Skip to main content

metric_engine/engine/
bulk_insert.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashSet;
16
17use api::v1::{ArrowIpc, SemanticType};
18use bytes::Bytes;
19use common_grpc::flight::{FlightEncoder, FlightMessage};
20use datatypes::arrow::record_batch::RecordBatch;
21use snafu::{OptionExt, ResultExt, ensure};
22use store_api::codec::PrimaryKeyEncoding;
23use store_api::metadata::RegionMetadataRef;
24use store_api::region_engine::RegionEngine;
25use store_api::region_request::{AffectedRows, RegionBulkInsertsRequest, RegionRequest};
26use store_api::storage::RegionId;
27
28use crate::batch_modifier::{TagColumnInfo, modify_batch_sparse};
29use crate::engine::MetricEngineInner;
30use crate::error;
31use crate::error::Result;
32use crate::metrics::MITO_OPERATION_ELAPSED;
33
34impl MetricEngineInner {
35    /// Bulk-inserts rows into a metric region.
36    ///
37    /// **Logical region path:** The request payload is a logical `RecordBatch`
38    /// (timestamp, value and tag columns). It is transformed to physical format
39    /// via `modify_batch_sparse`, encoded to Arrow IPC, and forwarded as a
40    /// `BulkInserts` request to the data region.
41    ///
42    /// **Physical region path:** The request payload is already in physical format
43    /// (produced by the batcher's `flush_batch_physical`). It is forwarded directly
44    /// to the data region with no transformation.
45    ///
46    /// Returns the number of affected rows, or `0` if the input batch is empty.
47    pub async fn bulk_insert_region(
48        &self,
49        region_id: RegionId,
50        request: RegionBulkInsertsRequest,
51    ) -> Result<AffectedRows> {
52        if request.payload.num_rows() == 0 {
53            return Ok(0);
54        }
55        if self.is_physical_region(region_id) {
56            let _timer = MITO_OPERATION_ELAPSED
57                .with_label_values(&["bulk_insert_physical"])
58                .start_timer();
59            return self.bulk_insert_physical_region(region_id, request).await;
60        }
61
62        let _timer = MITO_OPERATION_ELAPSED
63            .with_label_values(&["bulk_insert_logical"])
64            .start_timer();
65        self.bulk_insert_logical_region(region_id, request).await
66    }
67
68    /// Passthrough for bulk inserts targeting a physical data region.
69    ///
70    /// The batch is already in physical format (with `__primary_key`, timestamp,
71    /// value columns), so no logical-to-physical transformation is needed.
72    async fn bulk_insert_physical_region(
73        &self,
74        region_id: RegionId,
75        mut request: RegionBulkInsertsRequest,
76    ) -> Result<AffectedRows> {
77        // Simply set the aligned schema to the data region schema version to avoid filling missing columns
78        // because that schema should be constant and callers have ensured request has the same schema.
79        request.aligned_schema_version = Some(self.physical_schema_version(region_id).await?);
80        self.data_region
81            .write_data(region_id, RegionRequest::BulkInserts(request))
82            .await
83    }
84
85    /// Bulk-inserts logical rows, transforming them to physical format first.
86    async fn bulk_insert_logical_region(
87        &self,
88        region_id: RegionId,
89        request: RegionBulkInsertsRequest,
90    ) -> Result<AffectedRows> {
91        let (physical_region_id, data_region_id, primary_key_encoding) =
92            self.find_data_region_meta(region_id)?;
93
94        if primary_key_encoding != PrimaryKeyEncoding::Sparse {
95            return error::UnsupportedRegionRequestSnafu {
96                request: RegionRequest::BulkInserts(request),
97            }
98            .fail();
99        }
100
101        let batch = request.payload;
102        if batch.num_rows() == 0 {
103            return Ok(0);
104        }
105
106        let logical_metadata = self
107            .logical_region_metadata(physical_region_id, region_id)
108            .await?;
109        let (tag_columns, non_tag_indices) = self.resolve_tag_columns_from_metadata(
110            region_id,
111            data_region_id,
112            &batch,
113            &logical_metadata,
114        )?;
115        let modified_batch = modify_batch_sparse(
116            batch.clone(),
117            region_id.table_id(),
118            &tag_columns,
119            &non_tag_indices,
120        )?;
121        let (schema, data_header, payload) = record_batch_to_ipc(&modified_batch)?;
122
123        let partition_expr_version = request.partition_expr_version;
124        let aligned_schema_version = Some(self.physical_schema_version(data_region_id).await?);
125
126        let request = RegionBulkInsertsRequest {
127            region_id: data_region_id,
128            payload: modified_batch,
129            raw_data: ArrowIpc {
130                schema,
131                data_header,
132                payload,
133            },
134            partition_expr_version,
135            aligned_schema_version,
136        };
137        self.data_region
138            .write_data(data_region_id, RegionRequest::BulkInserts(request))
139            .await
140    }
141
142    async fn physical_schema_version(&self, region_id: RegionId) -> Result<u64> {
143        Ok(self
144            .mito
145            .get_metadata(region_id)
146            .await
147            .context(error::MitoReadOperationSnafu)?
148            .schema_version)
149    }
150
151    fn resolve_tag_columns_from_metadata(
152        &self,
153        logical_region_id: RegionId,
154        data_region_id: RegionId,
155        batch: &RecordBatch,
156        logical_metadata: &RegionMetadataRef,
157    ) -> Result<(Vec<TagColumnInfo>, Vec<usize>)> {
158        let tag_names: HashSet<&str> = logical_metadata
159            .column_metadatas
160            .iter()
161            .filter_map(|column| {
162                if column.semantic_type == SemanticType::Tag {
163                    Some(column.column_schema.name.as_str())
164                } else {
165                    None
166                }
167            })
168            .collect();
169
170        let mut tag_columns = Vec::new();
171        let mut non_tag_indices = Vec::new();
172        {
173            let state = self.state.read().unwrap();
174            let physical_columns = state
175                .physical_region_states()
176                .get(&data_region_id)
177                .context(error::PhysicalRegionNotFoundSnafu {
178                    region_id: data_region_id,
179                })?
180                .physical_columns();
181
182            for (index, field) in batch.schema().fields().iter().enumerate() {
183                let name = field.name();
184                let column_id = physical_columns
185                    .get(name)
186                    .map(|info| info.column_id)
187                    .with_context(|| error::ColumnNotFoundSnafu {
188                        name: name.clone(),
189                        region_id: logical_region_id,
190                    })?;
191                if tag_names.contains(name.as_str()) {
192                    tag_columns.push(TagColumnInfo {
193                        name: name.clone(),
194                        index,
195                        column_id,
196                    });
197                } else {
198                    non_tag_indices.push(index);
199                }
200            }
201        }
202
203        tag_columns.sort_by(|a, b| a.name.cmp(&b.name));
204        Ok((tag_columns, non_tag_indices))
205    }
206}
207
208fn record_batch_to_ipc(record_batch: &RecordBatch) -> Result<(Bytes, Bytes, Bytes)> {
209    let mut encoder = FlightEncoder::default();
210    let schema = encoder.encode_schema(record_batch.schema().as_ref());
211    let mut iter = encoder
212        .encode(FlightMessage::RecordBatch(record_batch.clone()))
213        .into_iter();
214
215    let Some(flight_data) = iter.next() else {
216        return error::UnexpectedRequestSnafu {
217            reason: "Failed to encode empty flight data",
218        }
219        .fail();
220    };
221    ensure!(
222        iter.next().is_none(),
223        error::UnexpectedRequestSnafu {
224            reason: "Bulk insert RecordBatch with dictionary arrays is unsupported".to_string(),
225        }
226    );
227
228    Ok((
229        schema.data_header,
230        flight_data.data_header,
231        flight_data.data_body,
232    ))
233}
234
235#[cfg(test)]
236mod tests {
237    use std::assert_matches;
238    use std::sync::Arc;
239
240    use api::v1::ArrowIpc;
241    use common_error::ext::ErrorExt;
242    use common_query::prelude::{greptime_timestamp, greptime_value};
243    use common_recordbatch::RecordBatches;
244    use datatypes::arrow::array::{Float64Array, StringArray, TimestampMillisecondArray};
245    use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema, TimeUnit};
246    use datatypes::arrow::record_batch::RecordBatch;
247    use mito2::config::MitoConfig;
248    use store_api::metric_engine_consts::PRIMARY_KEY_ENCODING;
249    use store_api::path_utils::table_dir;
250    use store_api::region_engine::RegionEngine;
251    use store_api::region_request::{RegionBulkInsertsRequest, RegionPutRequest, RegionRequest};
252    use store_api::storage::{RegionId, ScanRequest};
253
254    use super::record_batch_to_ipc;
255    use crate::batch_modifier::{TagColumnInfo, modify_batch_sparse};
256    use crate::error::Error;
257    use crate::test_util::{self, TestEnv};
258
259    fn build_logical_batch(start: usize, rows: usize) -> RecordBatch {
260        let schema = Arc::new(ArrowSchema::new(vec![
261            Field::new(
262                greptime_timestamp(),
263                DataType::Timestamp(TimeUnit::Millisecond, None),
264                false,
265            ),
266            Field::new(greptime_value(), DataType::Float64, true),
267            Field::new("job", DataType::Utf8, true),
268        ]));
269
270        let mut ts = Vec::with_capacity(rows);
271        let mut values = Vec::with_capacity(rows);
272        let mut tags = Vec::with_capacity(rows);
273        for i in start..start + rows {
274            ts.push(i as i64);
275            values.push(i as f64);
276            tags.push("tag_0".to_string());
277        }
278
279        RecordBatch::try_new(
280            schema,
281            vec![
282                Arc::new(TimestampMillisecondArray::from(ts)),
283                Arc::new(Float64Array::from(values)),
284                Arc::new(StringArray::from(tags)),
285            ],
286        )
287        .unwrap()
288    }
289
290    fn build_bulk_request(logical_region_id: RegionId, batch: RecordBatch) -> RegionRequest {
291        let (schema, data_header, payload) = record_batch_to_ipc(&batch).unwrap();
292        RegionRequest::BulkInserts(RegionBulkInsertsRequest {
293            region_id: logical_region_id,
294            payload: batch,
295            raw_data: ArrowIpc {
296                schema,
297                data_header,
298                payload,
299            },
300            partition_expr_version: None,
301            aligned_schema_version: None,
302        })
303    }
304
305    async fn init_dense_metric_region(env: &TestEnv) -> RegionId {
306        let physical_region_id = env.default_physical_region_id();
307        env.create_physical_region(
308            physical_region_id,
309            &TestEnv::default_table_dir(),
310            vec![(PRIMARY_KEY_ENCODING.to_string(), "dense".to_string())],
311        )
312        .await;
313
314        let logical_region_id = env.default_logical_region_id();
315        let request = test_util::create_logical_region_request(
316            &["job"],
317            physical_region_id,
318            &table_dir("test", logical_region_id.table_id()),
319        );
320        env.metric()
321            .handle_request(logical_region_id, RegionRequest::Create(request))
322            .await
323            .unwrap();
324        logical_region_id
325    }
326
327    #[tokio::test]
328    async fn test_bulk_insert_empty_batch_returns_zero() {
329        let env = TestEnv::new().await;
330        env.init_metric_region().await;
331        let logical_region_id = env.default_logical_region_id();
332
333        let batch = build_logical_batch(0, 0);
334        let request = RegionRequest::BulkInserts(RegionBulkInsertsRequest {
335            region_id: logical_region_id,
336            payload: batch,
337            raw_data: ArrowIpc::default(),
338            partition_expr_version: None,
339            aligned_schema_version: None,
340        });
341        let response = env
342            .metric()
343            .handle_request(logical_region_id, request)
344            .await
345            .unwrap();
346        assert_eq!(response.affected_rows, 0);
347    }
348
349    #[tokio::test]
350    async fn test_bulk_insert_physical_region_passthrough() {
351        // Use flat format so that BulkMemtable is used (supports write_bulk).
352        let mito_config = MitoConfig {
353            default_flat_format: true,
354            ..Default::default()
355        };
356        let env = TestEnv::with_mito_config("", mito_config, Default::default()).await;
357        env.init_metric_region().await;
358        let physical_region_id = env.default_physical_region_id();
359        let logical_region_id = env.default_logical_region_id();
360
361        // First, do a normal logical bulk insert so we can compare results.
362        let logical_batch = build_logical_batch(0, 3);
363        let logical_request = build_bulk_request(logical_region_id, logical_batch.clone());
364        let response = env
365            .metric()
366            .handle_request(logical_region_id, logical_request)
367            .await
368            .unwrap();
369        assert_eq!(response.affected_rows, 3);
370
371        // Now build a physical-format batch using modify_batch_sparse (simulating
372        // what the batcher's flush_batch_physical does) and send it directly to
373        // the physical region.
374        let tag_columns = vec![TagColumnInfo {
375            name: "job".to_string(),
376            index: 2,
377            column_id: 2, // column_id for "job" in the physical table
378        }];
379        let non_tag_indices = vec![0, 1]; // timestamp, value
380        let second_batch = build_logical_batch(3, 3);
381        let physical_batch = modify_batch_sparse(
382            second_batch,
383            logical_region_id.table_id(),
384            &tag_columns,
385            &non_tag_indices,
386        )
387        .unwrap();
388        let request = build_bulk_request(physical_region_id, physical_batch);
389        let response = env
390            .metric()
391            .handle_request(physical_region_id, request)
392            .await
393            .unwrap();
394        assert_eq!(response.affected_rows, 3);
395
396        // Verify all 6 rows are readable from the logical region.
397        let stream = env
398            .metric()
399            .scan_to_stream(logical_region_id, ScanRequest::default())
400            .await
401            .unwrap();
402        let batches = RecordBatches::try_collect(stream).await.unwrap();
403        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 6);
404    }
405
406    #[tokio::test]
407    async fn test_bulk_insert_physical_region_empty_batch() {
408        // Use flat format so that BulkMemtable is used (supports write_bulk).
409        let mito_config = MitoConfig {
410            default_flat_format: true,
411            ..Default::default()
412        };
413        let env = TestEnv::with_mito_config("", mito_config, Default::default()).await;
414        env.init_metric_region().await;
415        let physical_region_id = env.default_physical_region_id();
416
417        let batch = build_logical_batch(0, 0);
418        let request = build_bulk_request(physical_region_id, batch);
419        let response = env
420            .metric()
421            .handle_request(physical_region_id, request)
422            .await
423            .unwrap();
424        assert_eq!(response.affected_rows, 0);
425    }
426
427    #[tokio::test]
428    async fn test_bulk_insert_unknown_column_errors() {
429        let env = TestEnv::new().await;
430        env.init_metric_region().await;
431        let logical_region_id = env.default_logical_region_id();
432
433        let schema = Arc::new(ArrowSchema::new(vec![
434            Field::new(
435                greptime_timestamp(),
436                DataType::Timestamp(TimeUnit::Millisecond, None),
437                false,
438            ),
439            Field::new(greptime_value(), DataType::Float64, true),
440            Field::new("nonexistent_column", DataType::Utf8, true),
441        ]));
442        let batch = RecordBatch::try_new(
443            schema,
444            vec![
445                Arc::new(TimestampMillisecondArray::from(vec![0i64])),
446                Arc::new(Float64Array::from(vec![1.0])),
447                Arc::new(StringArray::from(vec!["val"])),
448            ],
449        )
450        .unwrap();
451
452        let request = build_bulk_request(logical_region_id, batch);
453        let err = env
454            .metric()
455            .handle_request(logical_region_id, request)
456            .await
457            .unwrap_err();
458        let Some(err) = err.as_any().downcast_ref::<Error>() else {
459            panic!("unexpected error type");
460        };
461        assert_matches!(err, Error::ColumnNotFound { .. });
462    }
463
464    #[tokio::test]
465    async fn test_bulk_insert_multiple_tag_columns() {
466        let env = TestEnv::new().await;
467        let physical_region_id = env.default_physical_region_id();
468        env.create_physical_region(physical_region_id, &TestEnv::default_table_dir(), vec![])
469            .await;
470        let logical_region_id = env.default_logical_region_id();
471        let request = test_util::create_logical_region_request(
472            &["host", "region"],
473            physical_region_id,
474            &table_dir("test", logical_region_id.table_id()),
475        );
476        env.metric()
477            .handle_request(logical_region_id, RegionRequest::Create(request))
478            .await
479            .unwrap();
480
481        let schema = Arc::new(ArrowSchema::new(vec![
482            Field::new(
483                greptime_timestamp(),
484                DataType::Timestamp(TimeUnit::Millisecond, None),
485                false,
486            ),
487            Field::new(greptime_value(), DataType::Float64, true),
488            Field::new("host", DataType::Utf8, true),
489            Field::new("region", DataType::Utf8, true),
490        ]));
491        let batch = RecordBatch::try_new(
492            schema,
493            vec![
494                Arc::new(TimestampMillisecondArray::from(vec![0i64, 1, 2])),
495                Arc::new(Float64Array::from(vec![10.0, 20.0, 30.0])),
496                Arc::new(StringArray::from(vec!["h1", "h2", "h1"])),
497                Arc::new(StringArray::from(vec!["us-east", "us-west", "eu-west"])),
498            ],
499        )
500        .unwrap();
501
502        let request = build_bulk_request(logical_region_id, batch);
503        let response = env
504            .metric()
505            .handle_request(logical_region_id, request)
506            .await
507            .unwrap();
508        assert_eq!(response.affected_rows, 3);
509
510        let stream = env
511            .metric()
512            .scan_to_stream(logical_region_id, ScanRequest::default())
513            .await
514            .unwrap();
515        let batches = RecordBatches::try_collect(stream).await.unwrap();
516        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
517    }
518
519    #[tokio::test]
520    async fn test_bulk_insert_accumulates_rows() {
521        let env = TestEnv::new().await;
522        env.init_metric_region().await;
523        let logical_region_id = env.default_logical_region_id();
524
525        let request = build_bulk_request(logical_region_id, build_logical_batch(0, 3));
526        let response = env
527            .metric()
528            .handle_request(logical_region_id, request)
529            .await
530            .unwrap();
531        assert_eq!(response.affected_rows, 3);
532
533        let request = build_bulk_request(logical_region_id, build_logical_batch(3, 5));
534        let response = env
535            .metric()
536            .handle_request(logical_region_id, request)
537            .await
538            .unwrap();
539        assert_eq!(response.affected_rows, 5);
540
541        let stream = env
542            .metric()
543            .scan_to_stream(logical_region_id, ScanRequest::default())
544            .await
545            .unwrap();
546        let batches = RecordBatches::try_collect(stream).await.unwrap();
547        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 8);
548    }
549
550    #[tokio::test]
551    async fn test_bulk_insert_sparse_encoding() {
552        let env = TestEnv::new().await;
553        env.init_metric_region().await;
554        let logical_region_id = env.default_logical_region_id();
555
556        let request = build_bulk_request(logical_region_id, build_logical_batch(0, 4));
557        let response = env
558            .metric()
559            .handle_request(logical_region_id, request)
560            .await
561            .unwrap();
562        assert_eq!(response.affected_rows, 4);
563
564        let stream = env
565            .metric()
566            .scan_to_stream(logical_region_id, ScanRequest::default())
567            .await
568            .unwrap();
569        let batches = RecordBatches::try_collect(stream).await.unwrap();
570        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 4);
571    }
572
573    #[tokio::test]
574    async fn test_bulk_insert_dense_encoding_rejected() {
575        let env = TestEnv::new().await;
576        let logical_region_id = init_dense_metric_region(&env).await;
577
578        let request = build_bulk_request(logical_region_id, build_logical_batch(0, 2));
579        let err = env
580            .metric()
581            .handle_request(logical_region_id, request)
582            .await
583            .unwrap_err();
584        let Some(err) = err.as_any().downcast_ref::<Error>() else {
585            panic!("unexpected error type");
586        };
587        assert_matches!(err, Error::UnsupportedRegionRequest { .. });
588    }
589
590    #[tokio::test]
591    async fn test_bulk_insert_matches_put() {
592        let env_put = TestEnv::new().await;
593        env_put.init_metric_region().await;
594        let logical_region_id = env_put.default_logical_region_id();
595        let schema = test_util::row_schema_with_tags(&["job"]);
596        let rows = test_util::build_rows(1, 5);
597        env_put
598            .metric()
599            .handle_request(
600                logical_region_id,
601                RegionRequest::Put(RegionPutRequest {
602                    rows: api::v1::Rows { schema, rows },
603                    hint: None,
604                    partition_expr_version: None,
605                }),
606            )
607            .await
608            .unwrap();
609        let put_stream = env_put
610            .metric()
611            .scan_to_stream(logical_region_id, ScanRequest::default())
612            .await
613            .unwrap();
614        let put_batches = RecordBatches::try_collect(put_stream).await.unwrap();
615        let put_output = put_batches.pretty_print().unwrap();
616
617        let env_bulk = TestEnv::new().await;
618        env_bulk.init_metric_region().await;
619        let request = build_bulk_request(logical_region_id, build_logical_batch(0, 5));
620        env_bulk
621            .metric()
622            .handle_request(logical_region_id, request)
623            .await
624            .unwrap();
625        let bulk_stream = env_bulk
626            .metric()
627            .scan_to_stream(logical_region_id, ScanRequest::default())
628            .await
629            .unwrap();
630        let bulk_batches = RecordBatches::try_collect(bulk_stream).await.unwrap();
631        let bulk_output = bulk_batches.pretty_print().unwrap();
632
633        assert_eq!(put_output, bulk_output);
634    }
635}