Skip to main content

metric_engine/engine/
put.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16
17use api::helper::ColumnDataTypeWrapper;
18use api::v1::{
19    ColumnSchema, PrimaryKeyEncoding as PrimaryKeyEncodingProto, Row, Rows, SemanticType, Value,
20    WriteHint,
21};
22use common_telemetry::{error, info};
23use fxhash::FxHashMap;
24use snafu::{OptionExt, ResultExt, ensure};
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::ColumnMetadata;
27use store_api::region_request::{
28    AffectedRows, RegionDeleteRequest, RegionPutRequest, RegionRequest,
29};
30use store_api::storage::{RegionId, TableId};
31
32use crate::engine::MetricEngineInner;
33use crate::error::{
34    ColumnNotFoundSnafu, CreateDefaultSnafu, ForbiddenPhysicalAlterSnafu, InvalidRequestSnafu,
35    LogicalRegionNotFoundSnafu, PhysicalRegionNotFoundSnafu, Result, UnexpectedRequestSnafu,
36    UnsupportedRegionRequestSnafu,
37};
38use crate::metrics::{FORBIDDEN_OPERATION_COUNT, MITO_OPERATION_ELAPSED};
39use crate::row_modifier::{RowsIter, TableIdInput};
40use crate::utils::to_data_region_id;
41
42impl MetricEngineInner {
43    /// Dispatch region put request
44    pub async fn put_region(
45        &self,
46        region_id: RegionId,
47        request: RegionPutRequest,
48    ) -> Result<AffectedRows> {
49        let is_putting_physical_region =
50            self.state.read().unwrap().exist_physical_region(region_id);
51
52        if is_putting_physical_region {
53            info!(
54                "Metric region received put request {request:?} on physical region {region_id:?}"
55            );
56            FORBIDDEN_OPERATION_COUNT.inc();
57
58            ForbiddenPhysicalAlterSnafu.fail()
59        } else {
60            self.put_logical_region(region_id, request).await
61        }
62    }
63
64    /// Batch write multiple logical regions to the same physical region.
65    ///
66    /// Dispatch region put requests in batch.
67    ///
68    /// Requests may span multiple physical regions. We group them by physical
69    /// region and write sequentially. This method fails fast on validation or
70    /// preparation errors within a group and stops at the first failure.
71    /// Writes in earlier physical-region groups are not rolled back if a later
72    /// group fails.
73    pub async fn put_regions_batch(
74        &self,
75        requests: impl ExactSizeIterator<Item = (RegionId, RegionPutRequest)>,
76    ) -> Result<AffectedRows> {
77        let len = requests.len();
78
79        if len == 0 {
80            return Ok(0);
81        }
82
83        let _timer = MITO_OPERATION_ELAPSED
84            .with_label_values(&["put_batch"])
85            .start_timer();
86
87        // Fast path: single request, no batching overhead
88        if len == 1 {
89            let (logical_id, req) = requests.into_iter().next().unwrap();
90            return self.put_logical_region(logical_id, req).await;
91        }
92
93        let mut requests_per_physical: HashMap<RegionId, Vec<(RegionId, RegionPutRequest)>> =
94            HashMap::new();
95        for (logical_region_id, request) in requests {
96            let physical_region_id = self.find_physical_region_id(logical_region_id)?;
97            requests_per_physical
98                .entry(physical_region_id)
99                .or_default()
100                .push((logical_region_id, request));
101        }
102
103        let mut total_affected_rows: AffectedRows = 0;
104        for (physical_region_id, requests) in requests_per_physical {
105            let affected_rows = self
106                .put_regions_batch_single_physical(physical_region_id, requests)
107                .await?;
108            total_affected_rows += affected_rows;
109        }
110
111        Ok(total_affected_rows)
112    }
113
114    /// Write a batch of requests that all belong to the same physical region.
115    ///
116    /// This function orchestrates the batch write process:
117    /// 1. Validates all requests
118    /// 2. Merges requests according to the encoding strategy (sparse or dense)
119    /// 3. Writes the merged batch to the physical region
120    async fn put_regions_batch_single_physical(
121        &self,
122        physical_region_id: RegionId,
123        mut requests: Vec<(RegionId, RegionPutRequest)>,
124    ) -> Result<AffectedRows> {
125        if requests.is_empty() {
126            return Ok(0);
127        }
128
129        let data_region_id = to_data_region_id(physical_region_id);
130        let primary_key_encoding = self.get_primary_key_encoding(data_region_id)?;
131
132        // Validate all requests
133        self.validate_batch_requests(physical_region_id, &mut requests)
134            .await?;
135
136        // Merge requests according to encoding strategy
137        let (merged_request, total_affected_rows) = match primary_key_encoding {
138            PrimaryKeyEncoding::Sparse => self.merge_sparse_batch(physical_region_id, requests)?,
139            PrimaryKeyEncoding::Dense => self.merge_dense_batch(data_region_id, requests)?,
140        };
141
142        // Write once to the physical region
143        self.data_region
144            .write_data(data_region_id, RegionRequest::Put(merged_request))
145            .await?;
146
147        Ok(total_affected_rows)
148    }
149
150    /// Get primary key encoding for a data region.
151    fn get_primary_key_encoding(&self, data_region_id: RegionId) -> Result<PrimaryKeyEncoding> {
152        let state = self.state.read().unwrap();
153        state
154            .get_primary_key_encoding(data_region_id)
155            .context(PhysicalRegionNotFoundSnafu {
156                region_id: data_region_id,
157            })
158    }
159
160    /// Validates all requests in a batch.
161    async fn validate_batch_requests(
162        &self,
163        physical_region_id: RegionId,
164        requests: &mut [(RegionId, RegionPutRequest)],
165    ) -> Result<()> {
166        for (logical_region_id, request) in requests {
167            self.verify_rows(
168                *logical_region_id,
169                physical_region_id,
170                &mut request.rows,
171                true,
172            )
173            .await?;
174        }
175        Ok(())
176    }
177
178    /// Merges multiple requests using sparse primary key encoding.
179    fn merge_sparse_batch(
180        &self,
181        physical_region_id: RegionId,
182        requests: Vec<(RegionId, RegionPutRequest)>,
183    ) -> Result<(RegionPutRequest, AffectedRows)> {
184        let total_rows: usize = requests.iter().map(|(_, req)| req.rows.rows.len()).sum();
185        let mut merged_rows = Vec::with_capacity(total_rows);
186        let mut total_affected_rows: AffectedRows = 0;
187        let mut output_schema: Option<Vec<ColumnSchema>> = None;
188        let mut merged_version: Option<u64> = None;
189
190        // Modify and collect rows from each request
191        for (logical_region_id, mut request) in requests {
192            if let Some(request_version) = request.partition_expr_version {
193                if let Some(merged_version) = merged_version {
194                    ensure!(
195                        merged_version == request_version,
196                        InvalidRequestSnafu {
197                            region_id: physical_region_id,
198                            reason: "inconsistent partition expr version in batch"
199                        }
200                    );
201                } else {
202                    merged_version = Some(request_version);
203                }
204            }
205            self.modify_rows(
206                physical_region_id,
207                logical_region_id.table_id(),
208                &mut request.rows,
209                PrimaryKeyEncoding::Sparse,
210            )?;
211
212            let row_count = request.rows.rows.len();
213            total_affected_rows += row_count as AffectedRows;
214
215            // Capture the output schema from the first modified request
216            if output_schema.is_none() {
217                output_schema = Some(request.rows.schema.clone());
218            }
219
220            merged_rows.extend(request.rows.rows);
221        }
222
223        // Safe to unwrap: requests is guaranteed non-empty by caller
224        let schema = output_schema.unwrap();
225
226        let merged_request = RegionPutRequest {
227            rows: Rows {
228                schema,
229                rows: merged_rows,
230            },
231            hint: Some(WriteHint {
232                primary_key_encoding: PrimaryKeyEncodingProto::Sparse.into(),
233            }),
234            partition_expr_version: merged_version,
235        };
236
237        Ok((merged_request, total_affected_rows))
238    }
239
240    /// Merges multiple requests using dense primary key encoding.
241    ///
242    /// In dense mode, different requests can have different columns.
243    /// We merge all schemas into a union schema, align each row to this schema,
244    /// then batch-modify all rows together (adding __table_id and __tsid).
245    fn merge_dense_batch(
246        &self,
247        data_region_id: RegionId,
248        requests: Vec<(RegionId, RegionPutRequest)>,
249    ) -> Result<(RegionPutRequest, AffectedRows)> {
250        // Build union schema from all requests
251        let merged_schema = Self::build_union_schema(&requests);
252
253        // Align all rows to the merged schema and collect table_ids
254        let (merged_rows, table_ids, merged_version) =
255            Self::align_requests_to_schema(requests, &merged_schema)?;
256
257        // Batch-modify all rows (add __table_id and __tsid columns)
258        let final_rows = {
259            let state = self.state.read().unwrap();
260            let physical_columns = state
261                .physical_region_states()
262                .get(&data_region_id)
263                .with_context(|| PhysicalRegionNotFoundSnafu {
264                    region_id: data_region_id,
265                })?
266                .physical_columns();
267
268            let iter = RowsIter::new(
269                Rows {
270                    schema: merged_schema,
271                    rows: merged_rows,
272                },
273                physical_columns,
274            );
275
276            self.row_modifier.modify_rows(
277                iter,
278                TableIdInput::Batch(&table_ids),
279                PrimaryKeyEncoding::Dense,
280            )?
281        };
282
283        let merged_request = RegionPutRequest {
284            rows: final_rows,
285            hint: None,
286            partition_expr_version: merged_version,
287        };
288
289        Ok((merged_request, table_ids.len() as AffectedRows))
290    }
291
292    /// Builds a union schema containing all columns from all requests.
293    fn build_union_schema(requests: &[(RegionId, RegionPutRequest)]) -> Vec<ColumnSchema> {
294        let mut schema_map: HashMap<&str, ColumnSchema> = HashMap::new();
295        for (_, request) in requests {
296            for col in &request.rows.schema {
297                schema_map
298                    .entry(col.column_name.as_str())
299                    .or_insert_with(|| col.clone());
300            }
301        }
302        schema_map.into_values().collect()
303    }
304
305    fn align_requests_to_schema(
306        requests: Vec<(RegionId, RegionPutRequest)>,
307        merged_schema: &[ColumnSchema],
308    ) -> Result<(Vec<Row>, Vec<TableId>, Option<u64>)> {
309        // Pre-calculate total capacity
310        let total_rows: usize = requests.iter().map(|(_, req)| req.rows.rows.len()).sum();
311        let mut merged_rows = Vec::with_capacity(total_rows);
312        let mut table_ids = Vec::with_capacity(total_rows);
313        let mut merged_version: Option<u64> = None;
314
315        let null_value = Value { value_data: None };
316
317        for (logical_region_id, request) in requests {
318            if let Some(request_version) = request.partition_expr_version {
319                if let Some(merged_version) = merged_version {
320                    ensure!(
321                        merged_version == request_version,
322                        InvalidRequestSnafu {
323                            region_id: logical_region_id,
324                            reason: "inconsistent partition expr version in batch"
325                        }
326                    );
327                } else {
328                    merged_version = Some(request_version);
329                }
330            }
331            let table_id = logical_region_id.table_id();
332
333            // Build column name to index mapping once per request
334            let col_name_to_idx: FxHashMap<&str, usize> = request
335                .rows
336                .schema
337                .iter()
338                .enumerate()
339                .map(|(idx, col)| (col.column_name.as_str(), idx))
340                .collect();
341
342            // Build column mapping array once per request
343            // col_mapping[i] = Some(idx) means merged_schema[i] is at request.schema[idx]
344            // col_mapping[i] = None means merged_schema[i] doesn't exist in request.schema
345            let col_mapping: Vec<Option<usize>> = merged_schema
346                .iter()
347                .map(|merged_col| {
348                    col_name_to_idx
349                        .get(merged_col.column_name.as_str())
350                        .copied()
351                })
352                .collect();
353
354            // Apply the mapping to all rows
355            for mut row in request.rows.rows {
356                let mut aligned_values = Vec::with_capacity(merged_schema.len());
357                for &opt_idx in &col_mapping {
358                    aligned_values.push(match opt_idx {
359                        Some(idx) => std::mem::take(&mut row.values[idx]),
360                        None => null_value.clone(),
361                    });
362                }
363                merged_rows.push(Row {
364                    values: aligned_values,
365                });
366                table_ids.push(table_id);
367            }
368        }
369
370        Ok((merged_rows, table_ids, merged_version))
371    }
372
373    /// Find the physical region id for a logical region.
374    fn find_physical_region_id(&self, logical_region_id: RegionId) -> Result<RegionId> {
375        let state = self.state.read().unwrap();
376        state
377            .logical_regions()
378            .get(&logical_region_id)
379            .copied()
380            .context(LogicalRegionNotFoundSnafu {
381                region_id: logical_region_id,
382            })
383    }
384
385    /// Dispatch region delete request
386    pub async fn delete_region(
387        &self,
388        region_id: RegionId,
389        request: RegionDeleteRequest,
390    ) -> Result<AffectedRows> {
391        if self.is_physical_region(region_id) {
392            info!(
393                "Metric region received delete request {request:?} on physical region {region_id:?}"
394            );
395            FORBIDDEN_OPERATION_COUNT.inc();
396
397            UnsupportedRegionRequestSnafu {
398                request: RegionRequest::Delete(request),
399            }
400            .fail()
401        } else {
402            self.delete_logical_region(region_id, request).await
403        }
404    }
405
406    async fn put_logical_region(
407        &self,
408        logical_region_id: RegionId,
409        mut request: RegionPutRequest,
410    ) -> Result<AffectedRows> {
411        let _timer = MITO_OPERATION_ELAPSED
412            .with_label_values(&["put"])
413            .start_timer();
414
415        let (physical_region_id, data_region_id, primary_key_encoding) =
416            self.find_data_region_meta(logical_region_id)?;
417
418        self.verify_rows(
419            logical_region_id,
420            physical_region_id,
421            &mut request.rows,
422            true,
423        )
424        .await?;
425
426        // write to data region
427        // TODO: retrieve table name
428        self.modify_rows(
429            physical_region_id,
430            logical_region_id.table_id(),
431            &mut request.rows,
432            primary_key_encoding,
433        )?;
434        if primary_key_encoding == PrimaryKeyEncoding::Sparse {
435            request.hint = Some(WriteHint {
436                primary_key_encoding: PrimaryKeyEncodingProto::Sparse.into(),
437            });
438        }
439        self.data_region
440            .write_data(data_region_id, RegionRequest::Put(request))
441            .await
442    }
443
444    async fn delete_logical_region(
445        &self,
446        logical_region_id: RegionId,
447        mut request: RegionDeleteRequest,
448    ) -> Result<AffectedRows> {
449        let _timer = MITO_OPERATION_ELAPSED
450            .with_label_values(&["delete"])
451            .start_timer();
452
453        let (physical_region_id, data_region_id, primary_key_encoding) =
454            self.find_data_region_meta(logical_region_id)?;
455
456        self.verify_rows(
457            logical_region_id,
458            physical_region_id,
459            &mut request.rows,
460            false,
461        )
462        .await?;
463
464        // write to data region
465        // TODO: retrieve table name
466        self.modify_rows(
467            physical_region_id,
468            logical_region_id.table_id(),
469            &mut request.rows,
470            primary_key_encoding,
471        )?;
472        if primary_key_encoding == PrimaryKeyEncoding::Sparse {
473            request.hint = Some(WriteHint {
474                primary_key_encoding: PrimaryKeyEncodingProto::Sparse.into(),
475            });
476        }
477        self.data_region
478            .write_data(data_region_id, RegionRequest::Delete(request))
479            .await
480    }
481
482    pub(crate) fn find_data_region_meta(
483        &self,
484        logical_region_id: RegionId,
485    ) -> Result<(RegionId, RegionId, PrimaryKeyEncoding)> {
486        let state = self.state.read().unwrap();
487        let physical_region_id = *state
488            .logical_regions()
489            .get(&logical_region_id)
490            .with_context(|| LogicalRegionNotFoundSnafu {
491                region_id: logical_region_id,
492            })?;
493        let data_region_id = to_data_region_id(physical_region_id);
494        let primary_key_encoding = state.get_primary_key_encoding(data_region_id).context(
495            PhysicalRegionNotFoundSnafu {
496                region_id: data_region_id,
497            },
498        )?;
499        Ok((physical_region_id, data_region_id, primary_key_encoding))
500    }
501
502    /// Verifies a request for a logical region against its corresponding metadata region.
503    ///
504    /// Includes:
505    /// - Check if the logical region exists
506    /// - Check if every column in the request exists in the physical region
507    /// - Check each column's datatype and semantic type match the physical region's schema
508    /// - Check the time index column is present
509    /// - When `check_fields` is true, check every physical field column is present.
510    ///   Set this to `false` for delete requests, which legitimately carry only
511    ///   the primary key + timestamp.
512    async fn verify_rows(
513        &self,
514        logical_region_id: RegionId,
515        physical_region_id: RegionId,
516        rows: &mut Rows,
517        check_fields: bool,
518    ) -> Result<()> {
519        // Check if the region exists
520        let data_region_id = to_data_region_id(physical_region_id);
521        let state = self.state.read().unwrap();
522        if !state.is_logical_region_exist(logical_region_id) {
523            error!("Trying to write to an nonexistent region {logical_region_id}");
524            return LogicalRegionNotFoundSnafu {
525                region_id: logical_region_id,
526            }
527            .fail();
528        }
529
530        // Type + semantic check on every column in the request schema.
531        let physical_state = state
532            .physical_region_states()
533            .get(&data_region_id)
534            .context(PhysicalRegionNotFoundSnafu {
535                region_id: data_region_id,
536            })?;
537        let physical_columns = physical_state.physical_columns();
538        for col in &rows.schema {
539            let info = physical_columns
540                .get(&col.column_name)
541                .context(ColumnNotFoundSnafu {
542                    name: &col.column_name,
543                    region_id: logical_region_id,
544                })?;
545
546            ensure!(
547                api::helper::is_column_type_value_eq(
548                    col.datatype,
549                    col.datatype_extension.clone(),
550                    &info.column_schema.data_type
551                ),
552                InvalidRequestSnafu {
553                    region_id: logical_region_id,
554                    reason: format!(
555                        "column {} expect type {:?}, given: {}({})",
556                        col.column_name,
557                        info.column_schema.data_type,
558                        api::v1::ColumnDataType::try_from(col.datatype)
559                            .map(|v| v.as_str_name())
560                            .unwrap_or("Unknown"),
561                        col.datatype,
562                    ),
563                }
564            );
565
566            ensure!(
567                api::helper::is_semantic_type_eq(col.semantic_type, info.semantic_type),
568                InvalidRequestSnafu {
569                    region_id: logical_region_id,
570                    reason: format!(
571                        "column {} expect semantic type {:?}, given: {}({})",
572                        col.column_name,
573                        info.semantic_type,
574                        api::v1::SemanticType::try_from(col.semantic_type)
575                            .map(|v| v.as_str_name())
576                            .unwrap_or("Unknown"),
577                        col.semantic_type,
578                    ),
579                }
580            );
581        }
582
583        let ts_name = physical_state.time_index_column_name();
584        ensure!(
585            rows.schema.iter().any(|col| col.column_name == ts_name),
586            InvalidRequestSnafu {
587                region_id: logical_region_id,
588                reason: format!("missing required time index column {ts_name}"),
589            }
590        );
591
592        if check_fields {
593            let field_name = physical_state.field_column_name();
594            if !rows.schema.iter().any(|col| col.column_name == field_name) {
595                let field_meta =
596                    physical_columns
597                        .get(field_name)
598                        .with_context(|| ColumnNotFoundSnafu {
599                            name: field_name,
600                            region_id: logical_region_id,
601                        })?;
602                Self::fill_missing_field_column(logical_region_id, field_name, field_meta, rows)?;
603            }
604        }
605
606        Ok(())
607    }
608
609    fn fill_missing_field_column(
610        logical_region_id: RegionId,
611        field_name: &str,
612        field_meta: &ColumnMetadata,
613        rows: &mut Rows,
614    ) -> Result<()> {
615        ensure!(
616            !field_meta.column_schema.is_default_impure(),
617            UnexpectedRequestSnafu {
618                reason: format!(
619                    "unexpected impure default value with region_id: {logical_region_id}, column: {field_name}, default_value: {:?}",
620                    field_meta.column_schema.default_constraint(),
621                ),
622            }
623        );
624
625        let default_value = field_meta
626            .column_schema
627            .create_default()
628            .context(CreateDefaultSnafu {
629                region_id: logical_region_id,
630                column: field_name,
631            })?
632            .with_context(|| InvalidRequestSnafu {
633                region_id: logical_region_id,
634                reason: format!("missing required field column {field_name}"),
635            })?;
636        let default_value = api::helper::to_grpc_value(default_value);
637        let (datatype, datatype_extension) =
638            ColumnDataTypeWrapper::try_from(field_meta.column_schema.data_type.clone())
639                .map_err(|e| {
640                    InvalidRequestSnafu {
641                        region_id: logical_region_id,
642                        reason: format!(
643                            "no protobuf type for field column {field_name} ({:?}): {e}",
644                            field_meta.column_schema.data_type
645                        ),
646                    }
647                    .build()
648                })?
649                .to_parts();
650
651        rows.schema.push(ColumnSchema {
652            column_name: field_name.to_string(),
653            datatype: datatype as i32,
654            semantic_type: SemanticType::Field as i32,
655            datatype_extension,
656            options: None,
657        });
658
659        for row in &mut rows.rows {
660            row.values.push(default_value.clone());
661        }
662
663        Ok(())
664    }
665
666    /// Perform metric engine specific logic to incoming rows.
667    /// - Add table_id column
668    /// - Generate tsid
669    fn modify_rows(
670        &self,
671        physical_region_id: RegionId,
672        table_id: TableId,
673        rows: &mut Rows,
674        encoding: PrimaryKeyEncoding,
675    ) -> Result<()> {
676        let input = std::mem::take(rows);
677        let iter = {
678            let state = self.state.read().unwrap();
679            let physical_columns = state
680                .physical_region_states()
681                .get(&physical_region_id)
682                .with_context(|| PhysicalRegionNotFoundSnafu {
683                    region_id: physical_region_id,
684                })?
685                .physical_columns();
686            RowsIter::new(input, physical_columns)
687        };
688        let output =
689            self.row_modifier
690                .modify_rows(iter, TableIdInput::Single(table_id), encoding)?;
691        *rows = output;
692        Ok(())
693    }
694}
695
696#[cfg(test)]
697mod tests {
698    use std::collections::HashSet;
699
700    use api::v1::value::ValueData;
701    use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema};
702    use common_error::ext::ErrorExt;
703    use common_error::status_code::StatusCode;
704    use common_function::utils::partition_expr_version;
705    use common_recordbatch::RecordBatches;
706    use datatypes::prelude::ConcreteDataType;
707    use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
708    use datatypes::value::Value as PartitionValue;
709    use partition::expr::col;
710    use store_api::metadata::ColumnMetadata;
711    use store_api::metric_engine_consts::{
712        DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME, PRIMARY_KEY_ENCODING,
713    };
714    use store_api::path_utils::table_dir;
715    use store_api::region_engine::RegionEngine;
716    use store_api::region_request::{
717        EnterStagingRequest, RegionRequest, StagingPartitionDirective,
718    };
719    use store_api::storage::ScanRequest;
720    use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME;
721
722    use super::*;
723    use crate::test_util::{self, TestEnv};
724
725    fn assert_merged_schema(rows: &Rows, expect_sparse: bool) {
726        let column_names: HashSet<String> = rows
727            .schema
728            .iter()
729            .map(|col| col.column_name.clone())
730            .collect();
731
732        if expect_sparse {
733            assert!(
734                column_names.contains(PRIMARY_KEY_COLUMN_NAME),
735                "sparse encoding should include primary key column"
736            );
737            assert!(
738                !column_names.contains(DATA_SCHEMA_TABLE_ID_COLUMN_NAME),
739                "sparse encoding should not include table id column"
740            );
741            assert!(
742                !column_names.contains(DATA_SCHEMA_TSID_COLUMN_NAME),
743                "sparse encoding should not include tsid column"
744            );
745            assert!(
746                !column_names.contains("job"),
747                "sparse encoding should not include tag columns"
748            );
749            assert!(
750                !column_names.contains("instance"),
751                "sparse encoding should not include tag columns"
752            );
753        } else {
754            assert!(
755                !column_names.contains(PRIMARY_KEY_COLUMN_NAME),
756                "dense encoding should not include primary key column"
757            );
758            assert!(
759                column_names.contains(DATA_SCHEMA_TABLE_ID_COLUMN_NAME),
760                "dense encoding should include table id column"
761            );
762            assert!(
763                column_names.contains(DATA_SCHEMA_TSID_COLUMN_NAME),
764                "dense encoding should include tsid column"
765            );
766            assert!(
767                column_names.contains("job"),
768                "dense encoding should keep tag columns"
769            );
770            assert!(
771                column_names.contains("instance"),
772                "dense encoding should keep tag columns"
773            );
774        }
775    }
776
777    fn job_partition_expr_json() -> String {
778        let expr = col("job")
779            .gt_eq(PartitionValue::String("job-0".into()))
780            .and(col("job").lt(PartitionValue::String("job-9".into())));
781        expr.as_json_str().unwrap()
782    }
783
784    async fn create_logical_region_with_tags(
785        env: &TestEnv,
786        physical_region_id: RegionId,
787        logical_region_id: RegionId,
788        tags: &[&str],
789    ) {
790        let region_create_request = test_util::create_logical_region_request(
791            tags,
792            physical_region_id,
793            &table_dir("test", logical_region_id.table_id()),
794        );
795        env.metric()
796            .handle_request(
797                logical_region_id,
798                RegionRequest::Create(region_create_request),
799            )
800            .await
801            .unwrap();
802    }
803
804    async fn run_batch_write_with_schema_variants(
805        env: &TestEnv,
806        physical_region_id: RegionId,
807        options: Vec<(String, String)>,
808        expect_sparse: bool,
809    ) {
810        env.create_physical_region(physical_region_id, &TestEnv::default_table_dir(), options)
811            .await;
812
813        let logical_region_1 = env.default_logical_region_id();
814        let logical_region_2 = RegionId::new(1024, 1);
815
816        create_logical_region_with_tags(env, physical_region_id, logical_region_1, &["job"]).await;
817        create_logical_region_with_tags(
818            env,
819            physical_region_id,
820            logical_region_2,
821            &["job", "instance"],
822        )
823        .await;
824
825        let schema_1 = test_util::row_schema_with_tags(&["job"]);
826        let schema_2 = test_util::row_schema_with_tags(&["job", "instance"]);
827
828        let data_region_id = RegionId::new(physical_region_id.table_id(), 2);
829        let primary_key_encoding = env
830            .metric()
831            .inner
832            .get_primary_key_encoding(data_region_id)
833            .unwrap();
834        assert_eq!(
835            primary_key_encoding,
836            if expect_sparse {
837                PrimaryKeyEncoding::Sparse
838            } else {
839                PrimaryKeyEncoding::Dense
840            }
841        );
842
843        let build_requests = || {
844            let rows_1 = test_util::build_rows(1, 3);
845            let rows_2 = test_util::build_rows(2, 2);
846
847            vec![
848                (
849                    logical_region_1,
850                    RegionPutRequest {
851                        rows: Rows {
852                            schema: schema_1.clone(),
853                            rows: rows_1,
854                        },
855                        hint: None,
856                        partition_expr_version: None,
857                    },
858                ),
859                (
860                    logical_region_2,
861                    RegionPutRequest {
862                        rows: Rows {
863                            schema: schema_2.clone(),
864                            rows: rows_2,
865                        },
866                        hint: None,
867                        partition_expr_version: None,
868                    },
869                ),
870            ]
871        };
872
873        let merged_request = if expect_sparse {
874            let (merged_request, _) = env
875                .metric()
876                .inner
877                .merge_sparse_batch(physical_region_id, build_requests())
878                .unwrap();
879            let hint = merged_request
880                .hint
881                .as_ref()
882                .expect("missing sparse write hint");
883            assert_eq!(
884                hint.primary_key_encoding,
885                PrimaryKeyEncodingProto::Sparse as i32
886            );
887            merged_request
888        } else {
889            let (merged_request, _) = env
890                .metric()
891                .inner
892                .merge_dense_batch(data_region_id, build_requests())
893                .unwrap();
894            assert!(merged_request.hint.is_none());
895            merged_request
896        };
897
898        assert_merged_schema(&merged_request.rows, expect_sparse);
899
900        let affected_rows = env
901            .metric()
902            .inner
903            .put_regions_batch(build_requests().into_iter())
904            .await
905            .unwrap();
906        assert_eq!(affected_rows, 5);
907
908        let request = ScanRequest::default();
909        let stream = env
910            .mito()
911            .scan_to_stream(data_region_id, request)
912            .await
913            .unwrap();
914        let batches = RecordBatches::try_collect(stream).await.unwrap();
915
916        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 5);
917    }
918
919    #[tokio::test]
920    async fn test_write_logical_region() {
921        let env = TestEnv::new().await;
922        env.init_metric_region().await;
923
924        // prepare data
925        let schema = test_util::row_schema_with_tags(&["job"]);
926        let rows = test_util::build_rows(1, 5);
927        let request = RegionRequest::Put(RegionPutRequest {
928            rows: Rows { schema, rows },
929            hint: None,
930            partition_expr_version: None,
931        });
932
933        // write data
934        let logical_region_id = env.default_logical_region_id();
935        let result = env
936            .metric()
937            .handle_request(logical_region_id, request)
938            .await
939            .unwrap();
940        assert_eq!(result.affected_rows, 5);
941
942        // read data from physical region
943        let physical_region_id = env.default_physical_region_id();
944        let request = ScanRequest::default();
945        let stream = env
946            .metric()
947            .scan_to_stream(physical_region_id, request)
948            .await
949            .unwrap();
950        let batches = RecordBatches::try_collect(stream).await.unwrap();
951        let expected = "\
952+-------------------------+----------------+------------+---------------------+-------+
953| greptime_timestamp      | greptime_value | __table_id | __tsid              | job   |
954+-------------------------+----------------+------------+---------------------+-------+
955| 1970-01-01T00:00:00     | 0.0            | 3          | 2955007454552897459 | tag_0 |
956| 1970-01-01T00:00:00.001 | 1.0            | 3          | 2955007454552897459 | tag_0 |
957| 1970-01-01T00:00:00.002 | 2.0            | 3          | 2955007454552897459 | tag_0 |
958| 1970-01-01T00:00:00.003 | 3.0            | 3          | 2955007454552897459 | tag_0 |
959| 1970-01-01T00:00:00.004 | 4.0            | 3          | 2955007454552897459 | tag_0 |
960+-------------------------+----------------+------------+---------------------+-------+";
961        assert_eq!(expected, batches.pretty_print().unwrap(), "physical region");
962
963        // read data from logical region
964        let request = ScanRequest::default();
965        let stream = env
966            .metric()
967            .scan_to_stream(logical_region_id, request)
968            .await
969            .unwrap();
970        let batches = RecordBatches::try_collect(stream).await.unwrap();
971        let expected = "\
972+-------------------------+----------------+-------+
973| greptime_timestamp      | greptime_value | job   |
974+-------------------------+----------------+-------+
975| 1970-01-01T00:00:00     | 0.0            | tag_0 |
976| 1970-01-01T00:00:00.001 | 1.0            | tag_0 |
977| 1970-01-01T00:00:00.002 | 2.0            | tag_0 |
978| 1970-01-01T00:00:00.003 | 3.0            | tag_0 |
979| 1970-01-01T00:00:00.004 | 4.0            | tag_0 |
980+-------------------------+----------------+-------+";
981        assert_eq!(expected, batches.pretty_print().unwrap(), "logical region");
982    }
983
984    #[tokio::test]
985    async fn test_write_logical_region_row_count() {
986        let env = TestEnv::new().await;
987        env.init_metric_region().await;
988        let engine = env.metric();
989
990        // add columns
991        let logical_region_id = env.default_logical_region_id();
992        let columns = &["odd", "even", "Ev_En"];
993        let alter_request = test_util::alter_logical_region_add_tag_columns(123456, columns);
994        engine
995            .handle_request(logical_region_id, RegionRequest::Alter(alter_request))
996            .await
997            .unwrap();
998
999        // prepare data
1000        let schema = test_util::row_schema_with_tags(columns);
1001        let rows = test_util::build_rows(3, 100);
1002        let request = RegionRequest::Put(RegionPutRequest {
1003            rows: Rows { schema, rows },
1004            hint: None,
1005            partition_expr_version: None,
1006        });
1007
1008        // write data
1009        let result = engine
1010            .handle_request(logical_region_id, request)
1011            .await
1012            .unwrap();
1013        assert_eq!(100, result.affected_rows);
1014    }
1015
1016    #[tokio::test]
1017    async fn test_write_physical_region() {
1018        let env = TestEnv::new().await;
1019        env.init_metric_region().await;
1020        let engine = env.metric();
1021
1022        let physical_region_id = env.default_physical_region_id();
1023        let schema = test_util::row_schema_with_tags(&["abc"]);
1024        let rows = test_util::build_rows(1, 100);
1025        let request = RegionRequest::Put(RegionPutRequest {
1026            rows: Rows { schema, rows },
1027            hint: None,
1028            partition_expr_version: None,
1029        });
1030
1031        engine
1032            .handle_request(physical_region_id, request)
1033            .await
1034            .unwrap_err();
1035    }
1036
1037    #[tokio::test]
1038    async fn test_write_nonexist_logical_region() {
1039        let env = TestEnv::new().await;
1040        env.init_metric_region().await;
1041        let engine = env.metric();
1042
1043        let logical_region_id = RegionId::new(175, 8345);
1044        let schema = test_util::row_schema_with_tags(&["def"]);
1045        let rows = test_util::build_rows(1, 100);
1046        let request = RegionRequest::Put(RegionPutRequest {
1047            rows: Rows { schema, rows },
1048            hint: None,
1049            partition_expr_version: None,
1050        });
1051
1052        engine
1053            .handle_request(logical_region_id, request)
1054            .await
1055            .unwrap_err();
1056    }
1057
1058    #[tokio::test]
1059    async fn test_batch_write_multiple_logical_regions() {
1060        let env = TestEnv::new().await;
1061        env.init_metric_region().await;
1062        let engine = env.metric();
1063
1064        // Create two additional logical regions
1065        let physical_region_id = env.default_physical_region_id();
1066        let logical_region_1 = env.default_logical_region_id();
1067        let logical_region_2 = RegionId::new(1024, 1);
1068        let logical_region_3 = RegionId::new(1024, 2);
1069
1070        env.create_logical_region(physical_region_id, logical_region_2)
1071            .await;
1072        env.create_logical_region(physical_region_id, logical_region_3)
1073            .await;
1074
1075        // Prepare batch requests with non-overlapping timestamps
1076        let schema = test_util::row_schema_with_tags(&["job"]);
1077
1078        // Use build_rows_with_ts to create non-overlapping timestamps
1079        // logical_region_1: ts 0, 1, 2
1080        // logical_region_2: ts 10, 11  (offset to avoid overlap)
1081        // logical_region_3: ts 20, 21, 22, 23, 24  (offset to avoid overlap)
1082        let rows1 = test_util::build_rows(1, 3);
1083        let mut rows2 = test_util::build_rows(1, 2);
1084        let mut rows3 = test_util::build_rows(1, 5);
1085
1086        // Adjust timestamps to avoid conflicts
1087        use api::v1::value::ValueData;
1088        for (i, row) in rows2.iter_mut().enumerate() {
1089            if let Some(ValueData::TimestampMillisecondValue(ts)) =
1090                row.values.get_mut(0).and_then(|v| v.value_data.as_mut())
1091            {
1092                *ts = (10 + i) as i64;
1093            }
1094        }
1095        for (i, row) in rows3.iter_mut().enumerate() {
1096            if let Some(ValueData::TimestampMillisecondValue(ts)) =
1097                row.values.get_mut(0).and_then(|v| v.value_data.as_mut())
1098            {
1099                *ts = (20 + i) as i64;
1100            }
1101        }
1102
1103        let requests = vec![
1104            (
1105                logical_region_1,
1106                RegionPutRequest {
1107                    rows: Rows {
1108                        schema: schema.clone(),
1109                        rows: rows1,
1110                    },
1111                    hint: None,
1112                    partition_expr_version: None,
1113                },
1114            ),
1115            (
1116                logical_region_2,
1117                RegionPutRequest {
1118                    rows: Rows {
1119                        schema: schema.clone(),
1120                        rows: rows2,
1121                    },
1122                    hint: None,
1123                    partition_expr_version: None,
1124                },
1125            ),
1126            (
1127                logical_region_3,
1128                RegionPutRequest {
1129                    rows: Rows {
1130                        schema: schema.clone(),
1131                        rows: rows3,
1132                    },
1133                    hint: None,
1134                    partition_expr_version: None,
1135                },
1136            ),
1137        ];
1138
1139        // Batch write
1140        let affected_rows = engine
1141            .inner
1142            .put_regions_batch(requests.into_iter())
1143            .await
1144            .unwrap();
1145        assert_eq!(affected_rows, 10);
1146
1147        // Verify physical region contains data from all logical regions
1148        let request = ScanRequest::default();
1149        let stream = env
1150            .metric()
1151            .scan_to_stream(physical_region_id, request)
1152            .await
1153            .unwrap();
1154        let batches = RecordBatches::try_collect(stream).await.unwrap();
1155
1156        // Should have 3 + 2 + 5 = 10 rows total
1157        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 10);
1158    }
1159
1160    #[tokio::test]
1161    async fn test_batch_write_with_partial_failure() {
1162        let env = TestEnv::new().await;
1163        env.init_metric_region().await;
1164        let engine = env.metric();
1165
1166        let physical_region_id = env.default_physical_region_id();
1167        let logical_region_1 = env.default_logical_region_id();
1168        let logical_region_2 = RegionId::new(1024, 1);
1169        let nonexistent_region = RegionId::new(9999, 9999);
1170
1171        env.create_logical_region(physical_region_id, logical_region_2)
1172            .await;
1173
1174        // Prepare batch with one invalid region
1175        let schema = test_util::row_schema_with_tags(&["job"]);
1176        let requests = vec![
1177            (
1178                logical_region_1,
1179                RegionPutRequest {
1180                    rows: Rows {
1181                        schema: schema.clone(),
1182                        rows: test_util::build_rows(1, 3),
1183                    },
1184                    hint: None,
1185                    partition_expr_version: None,
1186                },
1187            ),
1188            (
1189                nonexistent_region,
1190                RegionPutRequest {
1191                    rows: Rows {
1192                        schema: schema.clone(),
1193                        rows: test_util::build_rows(1, 2),
1194                    },
1195                    hint: None,
1196                    partition_expr_version: None,
1197                },
1198            ),
1199            (
1200                logical_region_2,
1201                RegionPutRequest {
1202                    rows: Rows {
1203                        schema: schema.clone(),
1204                        rows: test_util::build_rows(1, 5),
1205                    },
1206                    hint: None,
1207                    partition_expr_version: None,
1208                },
1209            ),
1210        ];
1211
1212        // Batch write
1213        let result = engine.inner.put_regions_batch(requests.into_iter()).await;
1214        assert!(result.is_err());
1215
1216        // Invalid region is detected before any write, so the physical region remains empty.
1217        // Fail-fast is per physical-region group; cross-group partial success is possible.
1218        let request = ScanRequest::default();
1219        let stream = env
1220            .metric()
1221            .scan_to_stream(physical_region_id, request)
1222            .await
1223            .unwrap();
1224        let batches = RecordBatches::try_collect(stream).await.unwrap();
1225
1226        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 0);
1227    }
1228
1229    #[tokio::test]
1230    async fn test_batch_write_single_request_fast_path() {
1231        let env = TestEnv::new().await;
1232        env.init_metric_region().await;
1233        let engine = env.metric();
1234
1235        let logical_region_id = env.default_logical_region_id();
1236        let schema = test_util::row_schema_with_tags(&["job"]);
1237
1238        // Single request should use fast path
1239        let requests = vec![(
1240            logical_region_id,
1241            RegionPutRequest {
1242                rows: Rows {
1243                    schema,
1244                    rows: test_util::build_rows(1, 5),
1245                },
1246                hint: None,
1247                partition_expr_version: None,
1248            },
1249        )];
1250
1251        let affected_rows = engine
1252            .inner
1253            .put_regions_batch(requests.into_iter())
1254            .await
1255            .unwrap();
1256        assert_eq!(affected_rows, 5);
1257    }
1258
1259    #[tokio::test]
1260    async fn test_batch_write_empty_requests() {
1261        let env = TestEnv::new().await;
1262        env.init_metric_region().await;
1263        let engine = env.metric();
1264
1265        // Empty batch should return zero affected rows
1266        let requests = vec![];
1267        let affected_rows = engine
1268            .inner
1269            .put_regions_batch(requests.into_iter())
1270            .await
1271            .unwrap();
1272
1273        assert_eq!(affected_rows, 0);
1274    }
1275
1276    #[tokio::test]
1277    async fn test_batch_write_sparse_encoding() {
1278        let env = TestEnv::new().await;
1279        let physical_region_id = env.default_physical_region_id();
1280
1281        run_batch_write_with_schema_variants(
1282            &env,
1283            physical_region_id,
1284            vec![(PRIMARY_KEY_ENCODING.to_string(), "sparse".to_string())],
1285            true,
1286        )
1287        .await;
1288    }
1289
1290    #[tokio::test]
1291    async fn test_batch_write_dense_encoding() {
1292        let env = TestEnv::new().await;
1293        let physical_region_id = env.default_physical_region_id();
1294
1295        run_batch_write_with_schema_variants(
1296            &env,
1297            physical_region_id,
1298            vec![(PRIMARY_KEY_ENCODING.to_string(), "dense".to_string())],
1299            false,
1300        )
1301        .await;
1302    }
1303
1304    #[tokio::test]
1305    async fn test_metric_put_rejects_bad_partition_expr_version() {
1306        let env = TestEnv::new().await;
1307        env.init_metric_region().await;
1308
1309        let logical_region_id = env.default_logical_region_id();
1310        let rows = Rows {
1311            schema: test_util::row_schema_with_tags(&["job"]),
1312            rows: test_util::build_rows(1, 3),
1313        };
1314
1315        let err = env
1316            .metric()
1317            .handle_request(
1318                logical_region_id,
1319                RegionRequest::Put(RegionPutRequest {
1320                    rows,
1321                    hint: None,
1322                    partition_expr_version: Some(1),
1323                }),
1324            )
1325            .await
1326            .unwrap_err();
1327
1328        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1329    }
1330
1331    #[tokio::test]
1332    async fn test_metric_put_respects_staging_partition_expr_version() {
1333        let env = TestEnv::new().await;
1334        env.init_metric_region().await;
1335
1336        let logical_region_id = env.default_logical_region_id();
1337        let physical_region_id = env.default_physical_region_id();
1338        let partition_expr = job_partition_expr_json();
1339        env.metric()
1340            .handle_request(
1341                physical_region_id,
1342                RegionRequest::EnterStaging(EnterStagingRequest {
1343                    partition_directive: StagingPartitionDirective::UpdatePartitionExpr(
1344                        partition_expr.clone(),
1345                    ),
1346                }),
1347            )
1348            .await
1349            .unwrap();
1350
1351        let expected_version = partition_expr_version(Some(&partition_expr));
1352        let rows = Rows {
1353            schema: test_util::row_schema_with_tags(&["job"]),
1354            rows: test_util::build_rows(1, 3),
1355        };
1356
1357        let err = env
1358            .metric()
1359            .handle_request(
1360                logical_region_id,
1361                RegionRequest::Put(RegionPutRequest {
1362                    rows: rows.clone(),
1363                    hint: None,
1364                    partition_expr_version: Some(expected_version.wrapping_add(1)),
1365                }),
1366            )
1367            .await
1368            .unwrap_err();
1369        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1370
1371        let response = env
1372            .metric()
1373            .handle_request(
1374                logical_region_id,
1375                RegionRequest::Put(RegionPutRequest {
1376                    rows: rows.clone(),
1377                    hint: None,
1378                    partition_expr_version: None,
1379                }),
1380            )
1381            .await
1382            .unwrap();
1383        assert_eq!(response.affected_rows, 3);
1384
1385        let response = env
1386            .metric()
1387            .handle_request(
1388                logical_region_id,
1389                RegionRequest::Put(RegionPutRequest {
1390                    rows,
1391                    hint: None,
1392                    partition_expr_version: Some(expected_version),
1393                }),
1394            )
1395            .await
1396            .unwrap();
1397        assert_eq!(response.affected_rows, 3);
1398    }
1399
1400    /// Regression test for issue #7990: the metric engine must reject a row
1401    /// whose timestamp column carries a non-timestamp datatype, rather than
1402    /// letting it panic inside mito's `ValueBuilder::push`.
1403    #[tokio::test]
1404    async fn test_verify_rows_rejects_wrong_type() {
1405        use api::v1::value::ValueData;
1406        use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, SemanticType};
1407        use common_query::prelude::{greptime_timestamp, greptime_value};
1408
1409        let env = TestEnv::new().await;
1410        env.init_metric_region().await;
1411
1412        let logical_region_id = env.default_logical_region_id();
1413
1414        // Timestamp column is declared as String — the very payload that
1415        // caused #7990. It should surface a typed error rather than panic.
1416        let schema = vec![
1417            PbColumnSchema {
1418                column_name: greptime_timestamp().to_string(),
1419                datatype: ColumnDataType::String as i32,
1420                semantic_type: SemanticType::Timestamp as _,
1421                datatype_extension: None,
1422                options: None,
1423            },
1424            PbColumnSchema {
1425                column_name: greptime_value().to_string(),
1426                datatype: ColumnDataType::Float64 as i32,
1427                semantic_type: SemanticType::Field as _,
1428                datatype_extension: None,
1429                options: None,
1430            },
1431            PbColumnSchema {
1432                column_name: "job".to_string(),
1433                datatype: ColumnDataType::String as i32,
1434                semantic_type: SemanticType::Tag as _,
1435                datatype_extension: None,
1436                options: None,
1437            },
1438        ];
1439        let rows = vec![Row {
1440            values: vec![
1441                Value {
1442                    value_data: Some(ValueData::StringValue("not-a-timestamp".to_string())),
1443                },
1444                Value {
1445                    value_data: Some(ValueData::F64Value(1.0)),
1446                },
1447                Value {
1448                    value_data: Some(ValueData::StringValue("tag_0".to_string())),
1449                },
1450            ],
1451        }];
1452
1453        let err = env
1454            .metric()
1455            .handle_request(
1456                logical_region_id,
1457                RegionRequest::Put(RegionPutRequest {
1458                    rows: Rows { schema, rows },
1459                    hint: None,
1460                    partition_expr_version: None,
1461                }),
1462            )
1463            .await
1464            .unwrap_err();
1465        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1466    }
1467
1468    /// The completeness check must reject requests that omit the time index
1469    /// column, since mito cannot default-fill a `TimeIndex` column and would
1470    /// previously panic on the empty builder.
1471    #[tokio::test]
1472    async fn test_verify_rows_rejects_missing_time_index() {
1473        use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, SemanticType};
1474        use common_query::prelude::greptime_value;
1475
1476        let env = TestEnv::new().await;
1477        env.init_metric_region().await;
1478
1479        let logical_region_id = env.default_logical_region_id();
1480
1481        // Payload only carries the field and a tag — no timestamp column.
1482        let schema = vec![
1483            PbColumnSchema {
1484                column_name: greptime_value().to_string(),
1485                datatype: ColumnDataType::Float64 as i32,
1486                semantic_type: SemanticType::Field as _,
1487                datatype_extension: None,
1488                options: None,
1489            },
1490            PbColumnSchema {
1491                column_name: "job".to_string(),
1492                datatype: ColumnDataType::String as i32,
1493                semantic_type: SemanticType::Tag as _,
1494                datatype_extension: None,
1495                options: None,
1496            },
1497        ];
1498        let rows = vec![Row {
1499            values: vec![
1500                Value {
1501                    value_data: Some(api::v1::value::ValueData::F64Value(1.0)),
1502                },
1503                Value {
1504                    value_data: Some(api::v1::value::ValueData::StringValue("tag_0".to_string())),
1505                },
1506            ],
1507        }];
1508
1509        let err = env
1510            .metric()
1511            .handle_request(
1512                logical_region_id,
1513                RegionRequest::Put(RegionPutRequest {
1514                    rows: Rows { schema, rows },
1515                    hint: None,
1516                    partition_expr_version: None,
1517                }),
1518            )
1519            .await
1520            .unwrap_err();
1521        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1522    }
1523
1524    #[tokio::test]
1525    async fn test_verify_rows_rejects_missing_field() {
1526        use api::v1::value::ValueData;
1527        use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, SemanticType};
1528        use common_query::prelude::greptime_timestamp;
1529
1530        let env = TestEnv::new().await;
1531        env.init_metric_region().await;
1532
1533        let logical_region_id = env.default_logical_region_id();
1534
1535        // Schema has timestamp + tag but no field column.
1536        let schema = vec![
1537            PbColumnSchema {
1538                column_name: greptime_timestamp().to_string(),
1539                datatype: ColumnDataType::TimestampMillisecond as i32,
1540                semantic_type: SemanticType::Timestamp as _,
1541                datatype_extension: None,
1542                options: None,
1543            },
1544            PbColumnSchema {
1545                column_name: "job".to_string(),
1546                datatype: ColumnDataType::String as i32,
1547                semantic_type: SemanticType::Tag as _,
1548                datatype_extension: None,
1549                options: None,
1550            },
1551        ];
1552        let rows = vec![Row {
1553            values: vec![
1554                Value {
1555                    value_data: Some(ValueData::TimestampMillisecondValue(0)),
1556                },
1557                Value {
1558                    value_data: Some(ValueData::StringValue("tag_0".to_string())),
1559                },
1560            ],
1561        }];
1562
1563        let err = env
1564            .metric()
1565            .handle_request(
1566                logical_region_id,
1567                RegionRequest::Put(RegionPutRequest {
1568                    rows: Rows { schema, rows },
1569                    hint: None,
1570                    partition_expr_version: None,
1571                }),
1572            )
1573            .await
1574            .unwrap_err();
1575        let message = err.to_string();
1576        assert!(
1577            message.contains("missing required field column"),
1578            "expected field-completeness rejection, got: {message}"
1579        );
1580        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1581    }
1582
1583    #[test]
1584    fn test_fill_missing_field_column_nullable_no_default() {
1585        let field_meta = ColumnMetadata {
1586            column_id: 1,
1587            semantic_type: SemanticType::Field,
1588            column_schema: ColumnSchema::new(
1589                "greptime_value".to_string(),
1590                ConcreteDataType::float64_datatype(),
1591                true, // nullable, no default
1592            ),
1593        };
1594        let mut rows = Rows {
1595            schema: vec![PbColumnSchema {
1596                column_name: "ts".to_string(),
1597                datatype: ColumnDataType::TimestampMillisecond as i32,
1598                semantic_type: SemanticType::Timestamp as _,
1599                datatype_extension: None,
1600                options: None,
1601            }],
1602            rows: vec![Row {
1603                values: vec![Value {
1604                    value_data: Some(ValueData::TimestampMillisecondValue(0)),
1605                }],
1606            }],
1607        };
1608
1609        MetricEngineInner::fill_missing_field_column(
1610            RegionId::new(1, 1),
1611            "greptime_value",
1612            &field_meta,
1613            &mut rows,
1614        )
1615        .unwrap();
1616
1617        assert_eq!(rows.schema.len(), 2);
1618        assert_eq!(rows.schema[1].column_name, "greptime_value");
1619        assert_eq!(rows.rows[0].values.len(), 2);
1620        assert!(
1621            rows.rows[0].values[1].value_data.is_none(),
1622            "missing nullable field should be filled with null"
1623        );
1624    }
1625
1626    #[test]
1627    fn test_fill_missing_field_column_rejects_impure_default() {
1628        let field_meta = ColumnMetadata {
1629            column_id: 1,
1630            semantic_type: SemanticType::Field,
1631            column_schema: ColumnSchema::new(
1632                "greptime_value".to_string(),
1633                ConcreteDataType::timestamp_millisecond_datatype(),
1634                false,
1635            )
1636            .with_default_constraint(Some(ColumnDefaultConstraint::Function("now()".to_string())))
1637            .unwrap(),
1638        };
1639        let mut rows = Rows {
1640            schema: vec![PbColumnSchema {
1641                column_name: "ts".to_string(),
1642                datatype: api::v1::ColumnDataType::TimestampMillisecond as i32,
1643                semantic_type: SemanticType::Timestamp as _,
1644                datatype_extension: None,
1645                options: None,
1646            }],
1647            rows: vec![Row {
1648                values: vec![Value {
1649                    value_data: Some(ValueData::TimestampMillisecondValue(0)),
1650                }],
1651            }],
1652        };
1653
1654        let err = MetricEngineInner::fill_missing_field_column(
1655            RegionId::new(1, 1),
1656            "greptime_value",
1657            &field_meta,
1658            &mut rows,
1659        )
1660        .unwrap_err();
1661        assert!(
1662            err.to_string().contains("impure default value"),
1663            "expected impure-default rejection, got: {err}"
1664        );
1665    }
1666}