1use std::collections::HashSet;
16
17use api::v1::{ArrowIpc, SemanticType};
18use bytes::Bytes;
19use common_grpc::flight::{FlightEncoder, FlightMessage};
20use datatypes::arrow::record_batch::RecordBatch;
21use snafu::{OptionExt, ResultExt, ensure};
22use store_api::codec::PrimaryKeyEncoding;
23use store_api::metadata::RegionMetadataRef;
24use store_api::region_engine::RegionEngine;
25use store_api::region_request::{AffectedRows, RegionBulkInsertsRequest, RegionRequest};
26use store_api::storage::RegionId;
27
28use crate::batch_modifier::{TagColumnInfo, modify_batch_sparse};
29use crate::engine::MetricEngineInner;
30use crate::error;
31use crate::error::Result;
32use crate::metrics::MITO_OPERATION_ELAPSED;
33
34impl MetricEngineInner {
35 pub async fn bulk_insert_region(
48 &self,
49 region_id: RegionId,
50 request: RegionBulkInsertsRequest,
51 ) -> Result<AffectedRows> {
52 if request.payload.num_rows() == 0 {
53 return Ok(0);
54 }
55 if self.is_physical_region(region_id) {
56 let _timer = MITO_OPERATION_ELAPSED
57 .with_label_values(&["bulk_insert_physical"])
58 .start_timer();
59 return self.bulk_insert_physical_region(region_id, request).await;
60 }
61
62 let _timer = MITO_OPERATION_ELAPSED
63 .with_label_values(&["bulk_insert_logical"])
64 .start_timer();
65 self.bulk_insert_logical_region(region_id, request).await
66 }
67
68 async fn bulk_insert_physical_region(
73 &self,
74 region_id: RegionId,
75 mut request: RegionBulkInsertsRequest,
76 ) -> Result<AffectedRows> {
77 request.aligned_schema_version = Some(self.physical_schema_version(region_id).await?);
80 self.data_region
81 .write_data(region_id, RegionRequest::BulkInserts(request))
82 .await
83 }
84
85 async fn bulk_insert_logical_region(
87 &self,
88 region_id: RegionId,
89 request: RegionBulkInsertsRequest,
90 ) -> Result<AffectedRows> {
91 let (physical_region_id, data_region_id, primary_key_encoding) =
92 self.find_data_region_meta(region_id)?;
93
94 if primary_key_encoding != PrimaryKeyEncoding::Sparse {
95 return error::UnsupportedRegionRequestSnafu {
96 request: RegionRequest::BulkInserts(request),
97 }
98 .fail();
99 }
100
101 let batch = request.payload;
102 if batch.num_rows() == 0 {
103 return Ok(0);
104 }
105
106 let logical_metadata = self
107 .logical_region_metadata(physical_region_id, region_id)
108 .await?;
109 let (tag_columns, non_tag_indices) = self.resolve_tag_columns_from_metadata(
110 region_id,
111 data_region_id,
112 &batch,
113 &logical_metadata,
114 )?;
115 let modified_batch = modify_batch_sparse(
116 batch.clone(),
117 region_id.table_id(),
118 &tag_columns,
119 &non_tag_indices,
120 )?;
121 let (schema, data_header, payload) = record_batch_to_ipc(&modified_batch)?;
122
123 let partition_expr_version = request.partition_expr_version;
124 let aligned_schema_version = Some(self.physical_schema_version(data_region_id).await?);
125
126 let request = RegionBulkInsertsRequest {
127 region_id: data_region_id,
128 payload: modified_batch,
129 raw_data: ArrowIpc {
130 schema,
131 data_header,
132 payload,
133 },
134 partition_expr_version,
135 aligned_schema_version,
136 };
137 self.data_region
138 .write_data(data_region_id, RegionRequest::BulkInserts(request))
139 .await
140 }
141
142 async fn physical_schema_version(&self, region_id: RegionId) -> Result<u64> {
143 Ok(self
144 .mito
145 .get_metadata(region_id)
146 .await
147 .context(error::MitoReadOperationSnafu)?
148 .schema_version)
149 }
150
151 fn resolve_tag_columns_from_metadata(
152 &self,
153 logical_region_id: RegionId,
154 data_region_id: RegionId,
155 batch: &RecordBatch,
156 logical_metadata: &RegionMetadataRef,
157 ) -> Result<(Vec<TagColumnInfo>, Vec<usize>)> {
158 let tag_names: HashSet<&str> = logical_metadata
159 .column_metadatas
160 .iter()
161 .filter_map(|column| {
162 if column.semantic_type == SemanticType::Tag {
163 Some(column.column_schema.name.as_str())
164 } else {
165 None
166 }
167 })
168 .collect();
169
170 let mut tag_columns = Vec::new();
171 let mut non_tag_indices = Vec::new();
172 {
173 let state = self.state.read().unwrap();
174 let physical_columns = state
175 .physical_region_states()
176 .get(&data_region_id)
177 .context(error::PhysicalRegionNotFoundSnafu {
178 region_id: data_region_id,
179 })?
180 .physical_columns();
181
182 for (index, field) in batch.schema().fields().iter().enumerate() {
183 let name = field.name();
184 let column_id = physical_columns
185 .get(name)
186 .map(|info| info.column_id)
187 .with_context(|| error::ColumnNotFoundSnafu {
188 name: name.clone(),
189 region_id: logical_region_id,
190 })?;
191 if tag_names.contains(name.as_str()) {
192 tag_columns.push(TagColumnInfo {
193 name: name.clone(),
194 index,
195 column_id,
196 });
197 } else {
198 non_tag_indices.push(index);
199 }
200 }
201 }
202
203 tag_columns.sort_by(|a, b| a.name.cmp(&b.name));
204 Ok((tag_columns, non_tag_indices))
205 }
206}
207
208fn record_batch_to_ipc(record_batch: &RecordBatch) -> Result<(Bytes, Bytes, Bytes)> {
209 let mut encoder = FlightEncoder::default();
210 let schema = encoder.encode_schema(record_batch.schema().as_ref());
211 let mut iter = encoder
212 .encode(FlightMessage::RecordBatch(record_batch.clone()))
213 .into_iter();
214
215 let Some(flight_data) = iter.next() else {
216 return error::UnexpectedRequestSnafu {
217 reason: "Failed to encode empty flight data",
218 }
219 .fail();
220 };
221 ensure!(
222 iter.next().is_none(),
223 error::UnexpectedRequestSnafu {
224 reason: "Bulk insert RecordBatch with dictionary arrays is unsupported".to_string(),
225 }
226 );
227
228 Ok((
229 schema.data_header,
230 flight_data.data_header,
231 flight_data.data_body,
232 ))
233}
234
235#[cfg(test)]
236mod tests {
237 use std::assert_matches;
238 use std::sync::Arc;
239
240 use api::v1::ArrowIpc;
241 use common_error::ext::ErrorExt;
242 use common_query::prelude::{greptime_timestamp, greptime_value};
243 use common_recordbatch::RecordBatches;
244 use datatypes::arrow::array::{Float64Array, StringArray, TimestampMillisecondArray};
245 use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema, TimeUnit};
246 use datatypes::arrow::record_batch::RecordBatch;
247 use mito2::config::MitoConfig;
248 use store_api::metric_engine_consts::PRIMARY_KEY_ENCODING;
249 use store_api::path_utils::table_dir;
250 use store_api::region_engine::RegionEngine;
251 use store_api::region_request::{RegionBulkInsertsRequest, RegionPutRequest, RegionRequest};
252 use store_api::storage::{RegionId, ScanRequest};
253
254 use super::record_batch_to_ipc;
255 use crate::batch_modifier::{TagColumnInfo, modify_batch_sparse};
256 use crate::error::Error;
257 use crate::test_util::{self, TestEnv};
258
259 fn build_logical_batch(start: usize, rows: usize) -> RecordBatch {
260 let schema = Arc::new(ArrowSchema::new(vec![
261 Field::new(
262 greptime_timestamp(),
263 DataType::Timestamp(TimeUnit::Millisecond, None),
264 false,
265 ),
266 Field::new(greptime_value(), DataType::Float64, true),
267 Field::new("job", DataType::Utf8, true),
268 ]));
269
270 let mut ts = Vec::with_capacity(rows);
271 let mut values = Vec::with_capacity(rows);
272 let mut tags = Vec::with_capacity(rows);
273 for i in start..start + rows {
274 ts.push(i as i64);
275 values.push(i as f64);
276 tags.push("tag_0".to_string());
277 }
278
279 RecordBatch::try_new(
280 schema,
281 vec![
282 Arc::new(TimestampMillisecondArray::from(ts)),
283 Arc::new(Float64Array::from(values)),
284 Arc::new(StringArray::from(tags)),
285 ],
286 )
287 .unwrap()
288 }
289
290 fn build_bulk_request(logical_region_id: RegionId, batch: RecordBatch) -> RegionRequest {
291 let (schema, data_header, payload) = record_batch_to_ipc(&batch).unwrap();
292 RegionRequest::BulkInserts(RegionBulkInsertsRequest {
293 region_id: logical_region_id,
294 payload: batch,
295 raw_data: ArrowIpc {
296 schema,
297 data_header,
298 payload,
299 },
300 partition_expr_version: None,
301 aligned_schema_version: None,
302 })
303 }
304
305 async fn init_dense_metric_region(env: &TestEnv) -> RegionId {
306 let physical_region_id = env.default_physical_region_id();
307 env.create_physical_region(
308 physical_region_id,
309 &TestEnv::default_table_dir(),
310 vec![(PRIMARY_KEY_ENCODING.to_string(), "dense".to_string())],
311 )
312 .await;
313
314 let logical_region_id = env.default_logical_region_id();
315 let request = test_util::create_logical_region_request(
316 &["job"],
317 physical_region_id,
318 &table_dir("test", logical_region_id.table_id()),
319 );
320 env.metric()
321 .handle_request(logical_region_id, RegionRequest::Create(request))
322 .await
323 .unwrap();
324 logical_region_id
325 }
326
327 #[tokio::test]
328 async fn test_bulk_insert_empty_batch_returns_zero() {
329 let env = TestEnv::new().await;
330 env.init_metric_region().await;
331 let logical_region_id = env.default_logical_region_id();
332
333 let batch = build_logical_batch(0, 0);
334 let request = RegionRequest::BulkInserts(RegionBulkInsertsRequest {
335 region_id: logical_region_id,
336 payload: batch,
337 raw_data: ArrowIpc::default(),
338 partition_expr_version: None,
339 aligned_schema_version: None,
340 });
341 let response = env
342 .metric()
343 .handle_request(logical_region_id, request)
344 .await
345 .unwrap();
346 assert_eq!(response.affected_rows, 0);
347 }
348
349 #[tokio::test]
350 async fn test_bulk_insert_physical_region_passthrough() {
351 let mito_config = MitoConfig {
353 default_flat_format: true,
354 ..Default::default()
355 };
356 let env = TestEnv::with_mito_config("", mito_config, Default::default()).await;
357 env.init_metric_region().await;
358 let physical_region_id = env.default_physical_region_id();
359 let logical_region_id = env.default_logical_region_id();
360
361 let logical_batch = build_logical_batch(0, 3);
363 let logical_request = build_bulk_request(logical_region_id, logical_batch.clone());
364 let response = env
365 .metric()
366 .handle_request(logical_region_id, logical_request)
367 .await
368 .unwrap();
369 assert_eq!(response.affected_rows, 3);
370
371 let tag_columns = vec![TagColumnInfo {
375 name: "job".to_string(),
376 index: 2,
377 column_id: 2, }];
379 let non_tag_indices = vec![0, 1]; let second_batch = build_logical_batch(3, 3);
381 let physical_batch = modify_batch_sparse(
382 second_batch,
383 logical_region_id.table_id(),
384 &tag_columns,
385 &non_tag_indices,
386 )
387 .unwrap();
388 let request = build_bulk_request(physical_region_id, physical_batch);
389 let response = env
390 .metric()
391 .handle_request(physical_region_id, request)
392 .await
393 .unwrap();
394 assert_eq!(response.affected_rows, 3);
395
396 let stream = env
398 .metric()
399 .scan_to_stream(logical_region_id, ScanRequest::default())
400 .await
401 .unwrap();
402 let batches = RecordBatches::try_collect(stream).await.unwrap();
403 assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 6);
404 }
405
406 #[tokio::test]
407 async fn test_bulk_insert_physical_region_empty_batch() {
408 let mito_config = MitoConfig {
410 default_flat_format: true,
411 ..Default::default()
412 };
413 let env = TestEnv::with_mito_config("", mito_config, Default::default()).await;
414 env.init_metric_region().await;
415 let physical_region_id = env.default_physical_region_id();
416
417 let batch = build_logical_batch(0, 0);
418 let request = build_bulk_request(physical_region_id, batch);
419 let response = env
420 .metric()
421 .handle_request(physical_region_id, request)
422 .await
423 .unwrap();
424 assert_eq!(response.affected_rows, 0);
425 }
426
427 #[tokio::test]
428 async fn test_bulk_insert_unknown_column_errors() {
429 let env = TestEnv::new().await;
430 env.init_metric_region().await;
431 let logical_region_id = env.default_logical_region_id();
432
433 let schema = Arc::new(ArrowSchema::new(vec![
434 Field::new(
435 greptime_timestamp(),
436 DataType::Timestamp(TimeUnit::Millisecond, None),
437 false,
438 ),
439 Field::new(greptime_value(), DataType::Float64, true),
440 Field::new("nonexistent_column", DataType::Utf8, true),
441 ]));
442 let batch = RecordBatch::try_new(
443 schema,
444 vec![
445 Arc::new(TimestampMillisecondArray::from(vec![0i64])),
446 Arc::new(Float64Array::from(vec![1.0])),
447 Arc::new(StringArray::from(vec!["val"])),
448 ],
449 )
450 .unwrap();
451
452 let request = build_bulk_request(logical_region_id, batch);
453 let err = env
454 .metric()
455 .handle_request(logical_region_id, request)
456 .await
457 .unwrap_err();
458 let Some(err) = err.as_any().downcast_ref::<Error>() else {
459 panic!("unexpected error type");
460 };
461 assert_matches!(err, Error::ColumnNotFound { .. });
462 }
463
464 #[tokio::test]
465 async fn test_bulk_insert_multiple_tag_columns() {
466 let env = TestEnv::new().await;
467 let physical_region_id = env.default_physical_region_id();
468 env.create_physical_region(physical_region_id, &TestEnv::default_table_dir(), vec![])
469 .await;
470 let logical_region_id = env.default_logical_region_id();
471 let request = test_util::create_logical_region_request(
472 &["host", "region"],
473 physical_region_id,
474 &table_dir("test", logical_region_id.table_id()),
475 );
476 env.metric()
477 .handle_request(logical_region_id, RegionRequest::Create(request))
478 .await
479 .unwrap();
480
481 let schema = Arc::new(ArrowSchema::new(vec![
482 Field::new(
483 greptime_timestamp(),
484 DataType::Timestamp(TimeUnit::Millisecond, None),
485 false,
486 ),
487 Field::new(greptime_value(), DataType::Float64, true),
488 Field::new("host", DataType::Utf8, true),
489 Field::new("region", DataType::Utf8, true),
490 ]));
491 let batch = RecordBatch::try_new(
492 schema,
493 vec![
494 Arc::new(TimestampMillisecondArray::from(vec![0i64, 1, 2])),
495 Arc::new(Float64Array::from(vec![10.0, 20.0, 30.0])),
496 Arc::new(StringArray::from(vec!["h1", "h2", "h1"])),
497 Arc::new(StringArray::from(vec!["us-east", "us-west", "eu-west"])),
498 ],
499 )
500 .unwrap();
501
502 let request = build_bulk_request(logical_region_id, batch);
503 let response = env
504 .metric()
505 .handle_request(logical_region_id, request)
506 .await
507 .unwrap();
508 assert_eq!(response.affected_rows, 3);
509
510 let stream = env
511 .metric()
512 .scan_to_stream(logical_region_id, ScanRequest::default())
513 .await
514 .unwrap();
515 let batches = RecordBatches::try_collect(stream).await.unwrap();
516 assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 3);
517 }
518
519 #[tokio::test]
520 async fn test_bulk_insert_accumulates_rows() {
521 let env = TestEnv::new().await;
522 env.init_metric_region().await;
523 let logical_region_id = env.default_logical_region_id();
524
525 let request = build_bulk_request(logical_region_id, build_logical_batch(0, 3));
526 let response = env
527 .metric()
528 .handle_request(logical_region_id, request)
529 .await
530 .unwrap();
531 assert_eq!(response.affected_rows, 3);
532
533 let request = build_bulk_request(logical_region_id, build_logical_batch(3, 5));
534 let response = env
535 .metric()
536 .handle_request(logical_region_id, request)
537 .await
538 .unwrap();
539 assert_eq!(response.affected_rows, 5);
540
541 let stream = env
542 .metric()
543 .scan_to_stream(logical_region_id, ScanRequest::default())
544 .await
545 .unwrap();
546 let batches = RecordBatches::try_collect(stream).await.unwrap();
547 assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 8);
548 }
549
550 #[tokio::test]
551 async fn test_bulk_insert_sparse_encoding() {
552 let env = TestEnv::new().await;
553 env.init_metric_region().await;
554 let logical_region_id = env.default_logical_region_id();
555
556 let request = build_bulk_request(logical_region_id, build_logical_batch(0, 4));
557 let response = env
558 .metric()
559 .handle_request(logical_region_id, request)
560 .await
561 .unwrap();
562 assert_eq!(response.affected_rows, 4);
563
564 let stream = env
565 .metric()
566 .scan_to_stream(logical_region_id, ScanRequest::default())
567 .await
568 .unwrap();
569 let batches = RecordBatches::try_collect(stream).await.unwrap();
570 assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 4);
571 }
572
573 #[tokio::test]
574 async fn test_bulk_insert_dense_encoding_rejected() {
575 let env = TestEnv::new().await;
576 let logical_region_id = init_dense_metric_region(&env).await;
577
578 let request = build_bulk_request(logical_region_id, build_logical_batch(0, 2));
579 let err = env
580 .metric()
581 .handle_request(logical_region_id, request)
582 .await
583 .unwrap_err();
584 let Some(err) = err.as_any().downcast_ref::<Error>() else {
585 panic!("unexpected error type");
586 };
587 assert_matches!(err, Error::UnsupportedRegionRequest { .. });
588 }
589
590 #[tokio::test]
591 async fn test_bulk_insert_matches_put() {
592 let env_put = TestEnv::new().await;
593 env_put.init_metric_region().await;
594 let logical_region_id = env_put.default_logical_region_id();
595 let schema = test_util::row_schema_with_tags(&["job"]);
596 let rows = test_util::build_rows(1, 5);
597 env_put
598 .metric()
599 .handle_request(
600 logical_region_id,
601 RegionRequest::Put(RegionPutRequest {
602 rows: api::v1::Rows { schema, rows },
603 hint: None,
604 partition_expr_version: None,
605 }),
606 )
607 .await
608 .unwrap();
609 let put_stream = env_put
610 .metric()
611 .scan_to_stream(logical_region_id, ScanRequest::default())
612 .await
613 .unwrap();
614 let put_batches = RecordBatches::try_collect(put_stream).await.unwrap();
615 let put_output = put_batches.pretty_print().unwrap();
616
617 let env_bulk = TestEnv::new().await;
618 env_bulk.init_metric_region().await;
619 let request = build_bulk_request(logical_region_id, build_logical_batch(0, 5));
620 env_bulk
621 .metric()
622 .handle_request(logical_region_id, request)
623 .await
624 .unwrap();
625 let bulk_stream = env_bulk
626 .metric()
627 .scan_to_stream(logical_region_id, ScanRequest::default())
628 .await
629 .unwrap();
630 let bulk_batches = RecordBatches::try_collect(bulk_stream).await.unwrap();
631 let bulk_output = bulk_batches.pretty_print().unwrap();
632
633 assert_eq!(put_output, bulk_output);
634 }
635}