Skip to main content

mito2/memtable/bulk/
part_reader.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::VecDeque;
16use std::time::Instant;
17
18use datatypes::arrow::array::BooleanArray;
19use datatypes::arrow::record_batch::RecordBatch;
20use mito_codec::row_converter::PrimaryKeyFilter;
21use parquet::arrow::ProjectionMask;
22use parquet::arrow::arrow_reader::ParquetRecordBatchReader;
23use snafu::ResultExt;
24use store_api::storage::SequenceRange;
25
26use crate::error::{self, ComputeArrowSnafu, DecodeArrowRowGroupSnafu};
27use crate::memtable::bulk::context::{BulkIterContext, BulkIterContextRef};
28use crate::memtable::bulk::part::EncodedBulkPart;
29use crate::memtable::bulk::row_group_reader::MemtableRowGroupReaderBuilder;
30use crate::memtable::{MemScanMetrics, MemScanMetricsData};
31use crate::metrics::{READ_ROWS_TOTAL, READ_STAGE_ELAPSED};
32use crate::sst::parquet::file_range::TagDecodeState;
33use crate::sst::parquet::flat_format::{primary_key_column_index, sequence_column_index};
34use crate::sst::parquet::prefilter::{CachedPrimaryKeyFilter, prefilter_flat_batch_by_primary_key};
35
36/// Iterator for reading data inside a bulk part.
37pub struct EncodedBulkPartIter {
38    context: BulkIterContextRef,
39    row_groups_to_read: VecDeque<usize>,
40    current_reader: Option<ParquetRecordBatchReader>,
41    builder: MemtableRowGroupReaderBuilder,
42    /// Sequence number filter.
43    sequence: Option<SequenceRange>,
44    /// Cached skip_fields for current row group.
45    current_skip_fields: bool,
46    /// Primary key filter for prefiltering before convert_batch.
47    pk_filter: Option<CachedPrimaryKeyFilter>,
48    /// Metrics for this iterator.
49    metrics: MemScanMetricsData,
50    /// Optional memory scan metrics to report to.
51    mem_scan_metrics: Option<MemScanMetrics>,
52}
53
54impl EncodedBulkPartIter {
55    /// Creates a new [BulkPartIter].
56    pub fn try_new(
57        encoded_part: &EncodedBulkPart,
58        context: BulkIterContextRef,
59        mut row_groups_to_read: VecDeque<usize>,
60        sequence: Option<SequenceRange>,
61        mem_scan_metrics: Option<MemScanMetrics>,
62    ) -> error::Result<Self> {
63        let parquet_meta = encoded_part.metadata().parquet_metadata.clone();
64        let data = encoded_part.data().clone();
65        let series_count = encoded_part.metadata().num_series as usize;
66
67        let projection_mask = ProjectionMask::roots(
68            parquet_meta.file_metadata().schema_descr(),
69            context.read_format().projection_indices().iter().copied(),
70        );
71        let builder =
72            MemtableRowGroupReaderBuilder::try_new(&context, projection_mask, parquet_meta, data)?;
73
74        // Build PK filter if applicable (flat format with dictionary-encoded PKs).
75        let pk_filter = context.build_pk_filter();
76
77        let (init_reader, current_skip_fields) = match row_groups_to_read.pop_front() {
78            Some(first_row_group) => {
79                let skip_fields = context.pre_filter_mode().skip_fields();
80                let reader = builder.build_row_group_reader(first_row_group, None)?;
81                (Some(reader), skip_fields)
82            }
83            None => (None, false),
84        };
85
86        Ok(Self {
87            context,
88            row_groups_to_read,
89            current_reader: init_reader,
90            builder,
91            sequence,
92            current_skip_fields,
93            pk_filter,
94            metrics: MemScanMetricsData {
95                total_series: series_count,
96                ..Default::default()
97            },
98            mem_scan_metrics,
99        })
100    }
101
102    fn report_mem_scan_metrics(&mut self) {
103        if let Some(mem_scan_metrics) = self.mem_scan_metrics.take() {
104            mem_scan_metrics.merge_inner(&self.metrics);
105        }
106    }
107
108    /// Fetches next non-empty record batch.
109    pub(crate) fn next_record_batch(&mut self) -> error::Result<Option<RecordBatch>> {
110        let start = Instant::now();
111
112        let Some(current) = &mut self.current_reader else {
113            // All row group exhausted.
114            self.metrics.scan_cost += start.elapsed();
115            return Ok(None);
116        };
117
118        for batch in current {
119            let batch = batch.context(DecodeArrowRowGroupSnafu)?;
120            if let Some(batch) = apply_combined_filters(
121                &self.context,
122                &self.sequence,
123                batch,
124                self.current_skip_fields,
125                self.pk_filter
126                    .as_mut()
127                    .map(|f| f as &mut dyn PrimaryKeyFilter),
128                &mut self.metrics,
129            )? {
130                // Update metrics
131                self.metrics.num_batches += 1;
132                self.metrics.num_rows += batch.num_rows();
133                self.metrics.scan_cost += start.elapsed();
134                return Ok(Some(batch));
135            }
136        }
137
138        // Previous row group exhausted, read next row group
139        while let Some(next_row_group) = self.row_groups_to_read.pop_front() {
140            // Compute skip_fields for this row group
141            self.current_skip_fields = self.context.pre_filter_mode().skip_fields();
142
143            let next_reader = self.builder.build_row_group_reader(next_row_group, None)?;
144            let current = self.current_reader.insert(next_reader);
145
146            for batch in current {
147                let batch = batch.context(DecodeArrowRowGroupSnafu)?;
148                if let Some(batch) = apply_combined_filters(
149                    &self.context,
150                    &self.sequence,
151                    batch,
152                    self.current_skip_fields,
153                    self.pk_filter
154                        .as_mut()
155                        .map(|f| f as &mut dyn PrimaryKeyFilter),
156                    &mut self.metrics,
157                )? {
158                    // Update metrics
159                    self.metrics.num_batches += 1;
160                    self.metrics.num_rows += batch.num_rows();
161                    self.metrics.scan_cost += start.elapsed();
162                    return Ok(Some(batch));
163                }
164            }
165        }
166
167        self.metrics.scan_cost += start.elapsed();
168        Ok(None)
169    }
170}
171
172impl Iterator for EncodedBulkPartIter {
173    type Item = error::Result<RecordBatch>;
174
175    fn next(&mut self) -> Option<Self::Item> {
176        let result = self.next_record_batch().transpose();
177
178        // Report metrics when iteration is complete
179        if result.is_none() {
180            self.report_mem_scan_metrics();
181        }
182
183        result
184    }
185}
186
187impl Drop for EncodedBulkPartIter {
188    fn drop(&mut self) {
189        common_telemetry::debug!(
190            "EncodedBulkPartIter region: {}, metrics: total_series={}, num_rows={}, num_batches={}, scan_cost={:?}, prefilter_cost={:?}, prefilter_rows_filtered={}",
191            self.context.region_id(),
192            self.metrics.total_series,
193            self.metrics.num_rows,
194            self.metrics.num_batches,
195            self.metrics.scan_cost,
196            self.metrics.prefilter_cost,
197            self.metrics.prefilter_rows_filtered
198        );
199
200        // Report MemScanMetrics if not already reported
201        self.report_mem_scan_metrics();
202
203        READ_ROWS_TOTAL
204            .with_label_values(&["bulk_memtable"])
205            .inc_by(self.metrics.num_rows as u64);
206        READ_STAGE_ELAPSED
207            .with_label_values(&["scan_memtable"])
208            .observe(self.metrics.scan_cost.as_secs_f64());
209    }
210}
211
212/// Iterator for reading record batches from a bulk part.
213///
214/// Iterates through one or more RecordBatches, applying filters and projections.
215pub struct BulkPartBatchIter {
216    /// Queue of RecordBatches to process.
217    batches: VecDeque<RecordBatch>,
218    /// Iterator context for filtering and projection.
219    context: BulkIterContextRef,
220    /// Sequence number filter.
221    sequence: Option<SequenceRange>,
222    /// Primary key filter for prefiltering before convert_batch.
223    pk_filter: Option<CachedPrimaryKeyFilter>,
224    /// Metrics for this iterator.
225    metrics: MemScanMetricsData,
226    /// Optional memory scan metrics to report to.
227    mem_scan_metrics: Option<MemScanMetrics>,
228}
229
230impl BulkPartBatchIter {
231    /// Creates a new [BulkPartBatchIter] from multiple RecordBatches.
232    pub fn new(
233        batches: Vec<RecordBatch>,
234        context: BulkIterContextRef,
235        sequence: Option<SequenceRange>,
236        series_count: usize,
237        mem_scan_metrics: Option<MemScanMetrics>,
238    ) -> Self {
239        let pk_filter = context.build_pk_filter();
240
241        Self {
242            batches: VecDeque::from(batches),
243            context,
244            sequence,
245            pk_filter,
246            metrics: MemScanMetricsData {
247                total_series: series_count,
248                ..Default::default()
249            },
250            mem_scan_metrics,
251        }
252    }
253
254    /// Creates a new [BulkPartBatchIter] from a single RecordBatch.
255    pub fn from_single(
256        record_batch: RecordBatch,
257        context: BulkIterContextRef,
258        sequence: Option<SequenceRange>,
259        series_count: usize,
260        mem_scan_metrics: Option<MemScanMetrics>,
261    ) -> Self {
262        Self::new(
263            vec![record_batch],
264            context,
265            sequence,
266            series_count,
267            mem_scan_metrics,
268        )
269    }
270
271    fn report_mem_scan_metrics(&mut self) {
272        if let Some(mem_scan_metrics) = self.mem_scan_metrics.take() {
273            mem_scan_metrics.merge_inner(&self.metrics);
274        }
275    }
276
277    /// Applies projection to the RecordBatch if needed.
278    fn apply_projection(&self, record_batch: RecordBatch) -> error::Result<RecordBatch> {
279        let projection_indices = self.context.read_format().projection_indices();
280        if projection_indices.len() == record_batch.num_columns() {
281            return Ok(record_batch);
282        }
283
284        record_batch
285            .project(projection_indices)
286            .context(ComputeArrowSnafu)
287    }
288
289    fn process_batch(&mut self, record_batch: RecordBatch) -> error::Result<Option<RecordBatch>> {
290        let start = Instant::now();
291
292        // Apply projection first.
293        let projected_batch = self.apply_projection(record_batch)?;
294
295        // Apply combined filtering (both predicate and sequence filters)
296        let skip_fields = self.context.pre_filter_mode().skip_fields();
297
298        let Some(filtered_batch) = apply_combined_filters(
299            &self.context,
300            &self.sequence,
301            projected_batch,
302            skip_fields,
303            self.pk_filter
304                .as_mut()
305                .map(|f| f as &mut dyn PrimaryKeyFilter),
306            &mut self.metrics,
307        )?
308        else {
309            self.metrics.scan_cost += start.elapsed();
310            return Ok(None);
311        };
312
313        // Update metrics
314        self.metrics.num_batches += 1;
315        self.metrics.num_rows += filtered_batch.num_rows();
316        self.metrics.scan_cost += start.elapsed();
317
318        Ok(Some(filtered_batch))
319    }
320}
321
322impl Iterator for BulkPartBatchIter {
323    type Item = error::Result<RecordBatch>;
324
325    fn next(&mut self) -> Option<Self::Item> {
326        // Process batches until we find a non-empty one or run out
327        while let Some(batch) = self.batches.pop_front() {
328            match self.process_batch(batch) {
329                Ok(Some(result)) => return Some(Ok(result)),
330                Ok(None) => continue, // This batch was filtered out, try next
331                Err(e) => {
332                    self.report_mem_scan_metrics();
333                    return Some(Err(e));
334                }
335            }
336        }
337
338        // No more batches
339        self.report_mem_scan_metrics();
340        None
341    }
342}
343
344impl Drop for BulkPartBatchIter {
345    fn drop(&mut self) {
346        common_telemetry::debug!(
347            "BulkPartBatchIter region: {}, metrics: total_series={}, num_rows={}, num_batches={}, scan_cost={:?}, prefilter_cost={:?}, prefilter_rows_filtered={}",
348            self.context.region_id(),
349            self.metrics.total_series,
350            self.metrics.num_rows,
351            self.metrics.num_batches,
352            self.metrics.scan_cost,
353            self.metrics.prefilter_cost,
354            self.metrics.prefilter_rows_filtered
355        );
356
357        // Report MemScanMetrics if not already reported
358        self.report_mem_scan_metrics();
359
360        READ_ROWS_TOTAL
361            .with_label_values(&["bulk_memtable"])
362            .inc_by(self.metrics.num_rows as u64);
363        READ_STAGE_ELAPSED
364            .with_label_values(&["scan_memtable"])
365            .observe(self.metrics.scan_cost.as_secs_f64());
366    }
367}
368
369/// Applies both predicate filtering and sequence filtering in a single pass.
370/// Returns None if the filtered batch is empty.
371///
372/// # Panics
373/// Panics if the format is not flat.
374fn apply_combined_filters(
375    context: &BulkIterContext,
376    sequence: &Option<SequenceRange>,
377    record_batch: RecordBatch,
378    skip_fields: bool,
379    pk_filter: Option<&mut dyn PrimaryKeyFilter>,
380    metrics: &mut MemScanMetricsData,
381) -> error::Result<Option<RecordBatch>> {
382    // Apply PK prefilter on raw batch before convert_batch to reduce conversion overhead.
383    let has_pk_prefilter = pk_filter.is_some();
384    let record_batch = if let Some(pk_filter) = pk_filter {
385        let rows_before = record_batch.num_rows();
386        let prefilter_start = Instant::now();
387        let pk_col_idx = primary_key_column_index(record_batch.num_columns());
388        match prefilter_flat_batch_by_primary_key(record_batch, pk_col_idx, pk_filter)? {
389            Some(batch) => {
390                metrics.prefilter_cost += prefilter_start.elapsed();
391                metrics.prefilter_rows_filtered += rows_before - batch.num_rows();
392                batch
393            }
394            None => {
395                metrics.prefilter_cost += prefilter_start.elapsed();
396                metrics.prefilter_rows_filtered += rows_before;
397                return Ok(None);
398            }
399        }
400    } else {
401        record_batch
402    };
403
404    // Converts the format to the flat format.
405    let record_batch = context.read_format().convert_batch(record_batch, None)?;
406
407    let num_rows = record_batch.num_rows();
408    let mut combined_filter = None;
409    let mut tag_decode_state = TagDecodeState::new();
410
411    // Apply predicate filters using the shared method.
412    if !context.base.filters.is_empty() {
413        let predicate_mask = context.base.compute_filter_mask_flat(
414            &record_batch,
415            skip_fields,
416            has_pk_prefilter,
417            &mut tag_decode_state,
418        )?;
419        // If predicate filters out the entire batch, return None early
420        let Some(mask) = predicate_mask else {
421            return Ok(None);
422        };
423        combined_filter = Some(BooleanArray::from(mask));
424    }
425
426    // Filters rows by the given `sequence`. Only preserves rows with sequence less than or equal to `sequence`.
427    if let Some(sequence) = sequence {
428        let sequence_column =
429            record_batch.column(sequence_column_index(record_batch.num_columns()));
430        let sequence_filter = sequence
431            .filter(&sequence_column)
432            .context(ComputeArrowSnafu)?;
433        // Combine with existing filter using AND operation
434        combined_filter = match combined_filter {
435            None => Some(sequence_filter),
436            Some(existing_filter) => {
437                let and_result = datatypes::arrow::compute::and(&existing_filter, &sequence_filter)
438                    .context(ComputeArrowSnafu)?;
439                Some(and_result)
440            }
441        };
442    }
443
444    // Apply the combined filter if any filters were applied
445    let Some(filter_array) = combined_filter else {
446        // No filters applied, return original batch
447        return Ok(Some(record_batch));
448    };
449    let select_count = filter_array.true_count();
450    if select_count == 0 {
451        return Ok(None);
452    }
453    if select_count == num_rows {
454        return Ok(Some(record_batch));
455    }
456    let filtered_batch =
457        datatypes::arrow::compute::filter_record_batch(&record_batch, &filter_array)
458            .context(ComputeArrowSnafu)?;
459
460    Ok(Some(filtered_batch))
461}
462
463#[cfg(test)]
464mod tests {
465    use std::sync::Arc;
466
467    use api::v1::SemanticType;
468    use datafusion_expr::{col, lit};
469    use datatypes::arrow::array::{
470        ArrayRef, BinaryArray, DictionaryArray, Int64Array, StringArray, UInt8Array, UInt32Array,
471        UInt64Array,
472    };
473    use datatypes::arrow::datatypes::{DataType, Field, Schema};
474    use datatypes::data_type::ConcreteDataType;
475    use datatypes::schema::ColumnSchema;
476    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
477    use store_api::storage::RegionId;
478    use table::predicate::Predicate;
479
480    use super::*;
481    use crate::memtable::bulk::context::BulkIterContext;
482    use crate::test_util::sst_util::new_primary_key;
483
484    #[test]
485    fn test_bulk_part_batch_iter() {
486        // Create a simple schema
487        let schema = Arc::new(Schema::new(vec![
488            Field::new("key1", DataType::Utf8, false),
489            Field::new("field1", DataType::Int64, false),
490            Field::new(
491                "timestamp",
492                DataType::Timestamp(datatypes::arrow::datatypes::TimeUnit::Millisecond, None),
493                false,
494            ),
495            Field::new(
496                "__primary_key",
497                DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Binary)),
498                false,
499            ),
500            Field::new("__sequence", DataType::UInt64, false),
501            Field::new("__op_type", DataType::UInt8, false),
502        ]));
503
504        // Create test data
505        let key1 = Arc::new(StringArray::from_iter_values(["key1", "key2", "key3"]));
506        let field1 = Arc::new(Int64Array::from(vec![11, 12, 13]));
507        let timestamp = Arc::new(datatypes::arrow::array::TimestampMillisecondArray::from(
508            vec![1000, 2000, 3000],
509        ));
510
511        // Create primary key dictionary array with properly encoded PKs
512        use datatypes::arrow::array::{BinaryArray, DictionaryArray, UInt32Array};
513        let pk1 = new_primary_key(&["key1"]);
514        let pk2 = new_primary_key(&["key2"]);
515        let pk3 = new_primary_key(&["key3"]);
516        let values = Arc::new(BinaryArray::from_iter_values([
517            pk1.as_slice(),
518            pk2.as_slice(),
519            pk3.as_slice(),
520        ]));
521        let keys = UInt32Array::from(vec![0, 1, 2]);
522        let primary_key = Arc::new(DictionaryArray::new(keys, values));
523
524        let sequence = Arc::new(UInt64Array::from(vec![1, 2, 3]));
525        let op_type = Arc::new(UInt8Array::from(vec![1, 1, 1])); // PUT operations
526
527        let record_batch = RecordBatch::try_new(
528            schema,
529            vec![
530                key1,
531                field1,
532                timestamp,
533                primary_key.clone(),
534                sequence,
535                op_type,
536            ],
537        )
538        .unwrap();
539
540        // Create a minimal region metadata for testing
541        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
542        builder
543            .push_column_metadata(ColumnMetadata {
544                column_schema: ColumnSchema::new(
545                    "key1",
546                    ConcreteDataType::string_datatype(),
547                    false,
548                ),
549                semantic_type: SemanticType::Tag,
550                column_id: 0,
551            })
552            .push_column_metadata(ColumnMetadata {
553                column_schema: ColumnSchema::new(
554                    "field1",
555                    ConcreteDataType::int64_datatype(),
556                    false,
557                ),
558                semantic_type: SemanticType::Field,
559                column_id: 1,
560            })
561            .push_column_metadata(ColumnMetadata {
562                column_schema: ColumnSchema::new(
563                    "timestamp",
564                    ConcreteDataType::timestamp_millisecond_datatype(),
565                    false,
566                ),
567                semantic_type: SemanticType::Timestamp,
568                column_id: 2,
569            })
570            .primary_key(vec![0]);
571
572        let region_metadata = builder.build().unwrap();
573
574        // Create context
575        let context = Arc::new(
576            BulkIterContext::new(
577                Arc::new(region_metadata.clone()),
578                None, // No projection
579                None, // No predicate
580                false,
581            )
582            .unwrap(),
583        );
584        // Iterates all rows.
585        let iter =
586            BulkPartBatchIter::from_single(record_batch.clone(), context.clone(), None, 0, None);
587        let result: Vec<_> = iter.map(|rb| rb.unwrap()).collect();
588        assert_eq!(1, result.len());
589        assert_eq!(3, result[0].num_rows());
590        assert_eq!(6, result[0].num_columns(),);
591
592        // Creates iter with sequence filter (only include sequences <= 2)
593        let iter = BulkPartBatchIter::from_single(
594            record_batch.clone(),
595            context,
596            Some(SequenceRange::LtEq { max: 2 }),
597            0,
598            None,
599        );
600        let result: Vec<_> = iter.map(|rb| rb.unwrap()).collect();
601        assert_eq!(1, result.len());
602        let expect_sequence = Arc::new(UInt64Array::from(vec![1, 2])) as ArrayRef;
603        assert_eq!(
604            &expect_sequence,
605            result[0].column(result[0].num_columns() - 2)
606        );
607        assert_eq!(6, result[0].num_columns());
608
609        let context = Arc::new(
610            BulkIterContext::new(
611                Arc::new(region_metadata),
612                Some(&[0, 2]),
613                Some(Predicate::new(vec![col("key1").eq(lit("key2"))])),
614                false,
615            )
616            .unwrap(),
617        );
618        // Creates iter with projection and predicate.
619        let iter =
620            BulkPartBatchIter::from_single(record_batch.clone(), context.clone(), None, 0, None);
621        let result: Vec<_> = iter.map(|rb| rb.unwrap()).collect();
622        assert_eq!(1, result.len());
623        assert_eq!(1, result[0].num_rows());
624        assert_eq!(5, result[0].num_columns());
625        let expect_sequence = Arc::new(UInt64Array::from(vec![2])) as ArrayRef;
626        assert_eq!(
627            &expect_sequence,
628            result[0].column(result[0].num_columns() - 2)
629        );
630    }
631
632    #[test]
633    fn test_bulk_part_batch_iter_multiple_batches() {
634        // Create a simple schema
635        let schema = Arc::new(Schema::new(vec![
636            Field::new("key1", DataType::Utf8, false),
637            Field::new("field1", DataType::Int64, false),
638            Field::new(
639                "timestamp",
640                DataType::Timestamp(datatypes::arrow::datatypes::TimeUnit::Millisecond, None),
641                false,
642            ),
643            Field::new(
644                "__primary_key",
645                DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Binary)),
646                false,
647            ),
648            Field::new("__sequence", DataType::UInt64, false),
649            Field::new("__op_type", DataType::UInt8, false),
650        ]));
651
652        // Create first batch with 2 rows
653        let pk1 = new_primary_key(&["key1"]);
654        let pk2 = new_primary_key(&["key2"]);
655        let key1_1 = Arc::new(StringArray::from_iter_values(["key1", "key2"]));
656        let field1_1 = Arc::new(Int64Array::from(vec![11, 12]));
657        let timestamp_1 = Arc::new(datatypes::arrow::array::TimestampMillisecondArray::from(
658            vec![1000, 2000],
659        ));
660        let values_1 = Arc::new(BinaryArray::from_iter_values([
661            pk1.as_slice(),
662            pk2.as_slice(),
663        ]));
664        let keys_1 = UInt32Array::from(vec![0, 1]);
665        let primary_key_1 = Arc::new(DictionaryArray::new(keys_1, values_1));
666        let sequence_1 = Arc::new(UInt64Array::from(vec![1, 2]));
667        let op_type_1 = Arc::new(UInt8Array::from(vec![1, 1]));
668
669        let batch1 = RecordBatch::try_new(
670            schema.clone(),
671            vec![
672                key1_1,
673                field1_1,
674                timestamp_1,
675                primary_key_1,
676                sequence_1,
677                op_type_1,
678            ],
679        )
680        .unwrap();
681
682        // Create second batch with 3 rows
683        let pk3 = new_primary_key(&["key3"]);
684        let pk4 = new_primary_key(&["key4"]);
685        let pk5 = new_primary_key(&["key5"]);
686        let key1_2 = Arc::new(StringArray::from_iter_values(["key3", "key4", "key5"]));
687        let field1_2 = Arc::new(Int64Array::from(vec![13, 14, 15]));
688        let timestamp_2 = Arc::new(datatypes::arrow::array::TimestampMillisecondArray::from(
689            vec![3000, 4000, 5000],
690        ));
691        let values_2 = Arc::new(BinaryArray::from_iter_values([
692            pk3.as_slice(),
693            pk4.as_slice(),
694            pk5.as_slice(),
695        ]));
696        let keys_2 = UInt32Array::from(vec![0, 1, 2]);
697        let primary_key_2 = Arc::new(DictionaryArray::new(keys_2, values_2));
698        let sequence_2 = Arc::new(UInt64Array::from(vec![3, 4, 5]));
699        let op_type_2 = Arc::new(UInt8Array::from(vec![1, 1, 1]));
700
701        let batch2 = RecordBatch::try_new(
702            schema.clone(),
703            vec![
704                key1_2,
705                field1_2,
706                timestamp_2,
707                primary_key_2,
708                sequence_2,
709                op_type_2,
710            ],
711        )
712        .unwrap();
713
714        // Create region metadata
715        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
716        builder
717            .push_column_metadata(ColumnMetadata {
718                column_schema: ColumnSchema::new(
719                    "key1",
720                    ConcreteDataType::string_datatype(),
721                    false,
722                ),
723                semantic_type: SemanticType::Tag,
724                column_id: 0,
725            })
726            .push_column_metadata(ColumnMetadata {
727                column_schema: ColumnSchema::new(
728                    "field1",
729                    ConcreteDataType::int64_datatype(),
730                    false,
731                ),
732                semantic_type: SemanticType::Field,
733                column_id: 1,
734            })
735            .push_column_metadata(ColumnMetadata {
736                column_schema: ColumnSchema::new(
737                    "timestamp",
738                    ConcreteDataType::timestamp_millisecond_datatype(),
739                    false,
740                ),
741                semantic_type: SemanticType::Timestamp,
742                column_id: 2,
743            })
744            .primary_key(vec![0]);
745
746        let region_metadata = builder.build().unwrap();
747
748        // Create context
749        let context = Arc::new(
750            BulkIterContext::new(
751                Arc::new(region_metadata),
752                None, // No projection
753                None, // No predicate
754                false,
755            )
756            .unwrap(),
757        );
758
759        // Create iterator with multiple batches
760        let expect_batches = vec![batch1, batch2];
761        let iter = BulkPartBatchIter::new(expect_batches.clone(), context.clone(), None, 0, None);
762
763        // Collect all results
764        let result: Vec<_> = iter.map(|rb| rb.unwrap()).collect();
765        assert_eq!(expect_batches, result);
766    }
767}