Skip to main content

mito2/read/
seq_scan.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Sequential scan.
16
17use std::fmt;
18use std::sync::Arc;
19use std::time::Instant;
20
21use async_stream::try_stream;
22use common_error::ext::BoxedError;
23use common_recordbatch::util::ChainedRecordBatchStream;
24use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
25use common_telemetry::tracing;
26use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
27use datafusion::physical_plan::{DisplayAs, DisplayFormatType};
28use datatypes::schema::SchemaRef;
29use futures::{StreamExt, TryStreamExt};
30use snafu::ensure;
31use store_api::metadata::RegionMetadataRef;
32use store_api::region_engine::{
33    PartitionRange, PrepareRequest, QueryScanContext, RegionScanner, ScannerProperties,
34};
35use store_api::storage::TimeSeriesRowSelector;
36use tokio::sync::Semaphore;
37
38use crate::error::{PartitionOutOfRangeSnafu, Result, TooManyFilesToReadSnafu};
39use crate::read::flat_dedup::{FlatDedupReader, FlatLastNonNull, FlatLastRow};
40use crate::read::flat_merge::FlatMergeReader;
41use crate::read::last_row::FlatLastRowReader;
42use crate::read::pruner::{PartitionPruner, Pruner};
43use crate::read::range::RangeMeta;
44use crate::read::range_cache::{
45    build_range_cache_key, cache_flat_range_stream, cached_flat_range_stream,
46};
47use crate::read::scan_region::{ScanInput, StreamContext};
48use crate::read::scan_util::{
49    PartitionMetrics, PartitionMetricsList, SplitRecordBatchStream, compute_parallel_channel_size,
50    scan_flat_file_ranges, scan_flat_mem_ranges, should_split_flat_batches_for_merge,
51};
52use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
53use crate::read::{BoxedRecordBatchStream, ScannerMetrics, scan_util};
54use crate::region::options::MergeMode;
55use crate::sst::parquet::DEFAULT_READ_BATCH_SIZE;
56
57/// Scans a region and returns rows in a sorted sequence.
58///
59/// The output order is always `order by primary keys, time index` inside every
60/// [`PartitionRange`]. Each "partition" may contains many [`PartitionRange`]s.
61pub struct SeqScan {
62    /// Properties of the scanner.
63    properties: ScannerProperties,
64    /// Context of streams.
65    stream_ctx: Arc<StreamContext>,
66    /// Shared pruner for file range building.
67    pruner: Arc<Pruner>,
68    /// Metrics for each partition.
69    /// The scanner only sets in query and keeps it empty during compaction.
70    metrics_list: PartitionMetricsList,
71}
72
73impl SeqScan {
74    /// Creates a new [SeqScan] with the given input.
75    /// If `input.compaction` is true, the scanner will not attempt to split ranges.
76    pub(crate) fn new(input: ScanInput) -> Self {
77        let mut properties = ScannerProperties::default()
78            .with_append_mode(input.append_mode)
79            .with_total_rows(input.total_rows());
80        let stream_ctx = Arc::new(StreamContext::seq_scan_ctx(input));
81        properties.partitions = vec![stream_ctx.partition_ranges()];
82
83        // Create the shared pruner with number of workers equal to CPU cores.
84        let num_workers = common_stat::get_total_cpu_cores().max(1);
85        let pruner = Arc::new(Pruner::new(stream_ctx.clone(), num_workers));
86
87        Self {
88            properties,
89            stream_ctx,
90            pruner,
91            metrics_list: PartitionMetricsList::default(),
92        }
93    }
94
95    /// Builds a stream for the query.
96    ///
97    /// The returned stream is not partitioned and will contains all the data. If want
98    /// partitioned scan, use [`RegionScanner::scan_partition`].
99    #[tracing::instrument(
100        skip_all,
101        fields(region_id = %self.stream_ctx.input.mapper.metadata().region_id)
102    )]
103    pub fn build_stream(&self) -> Result<SendableRecordBatchStream, BoxedError> {
104        let metrics_set = ExecutionPlanMetricsSet::new();
105        let streams = (0..self.properties.partitions.len())
106            .map(|partition: usize| {
107                self.scan_partition(&QueryScanContext::default(), &metrics_set, partition)
108            })
109            .collect::<Result<Vec<_>, _>>()?;
110
111        let aggr_stream = ChainedRecordBatchStream::new(streams).map_err(BoxedError::new)?;
112        Ok(Box::pin(aggr_stream))
113    }
114
115    /// Scan [`Batch`] in all partitions one by one.
116    pub(crate) fn scan_all_partitions(&self) -> Result<ScanBatchStream> {
117        let metrics_set = ExecutionPlanMetricsSet::new();
118
119        let streams = (0..self.properties.partitions.len())
120            .map(|partition| {
121                let metrics = self.new_partition_metrics(false, &metrics_set, partition);
122                self.scan_flat_batch_in_partition(partition, metrics)
123            })
124            .collect::<Result<Vec<_>>>()?;
125
126        Ok(Box::pin(futures::stream::iter(streams).flatten()))
127    }
128
129    /// Builds a [BoxedRecordBatchStream] from sequential scan for flat format compaction.
130    ///
131    /// # Panics
132    /// Panics if the compaction flag is not set.
133    pub async fn build_flat_reader_for_compaction(&self) -> Result<BoxedRecordBatchStream> {
134        assert!(self.stream_ctx.input.compaction);
135
136        let metrics_set = ExecutionPlanMetricsSet::new();
137        let part_metrics = self.new_partition_metrics(false, &metrics_set, 0);
138        debug_assert_eq!(1, self.properties.partitions.len());
139        let partition_ranges = &self.properties.partitions[0];
140
141        let reader = Self::merge_all_flat_ranges_for_compaction(
142            &self.stream_ctx,
143            partition_ranges,
144            &part_metrics,
145            self.pruner.clone(),
146        )
147        .await?;
148        Ok(reader)
149    }
150
151    /// Builds a merge reader that reads all flat ranges.
152    /// Callers MUST not split ranges before calling this method.
153    async fn merge_all_flat_ranges_for_compaction(
154        stream_ctx: &Arc<StreamContext>,
155        partition_ranges: &[PartitionRange],
156        part_metrics: &PartitionMetrics,
157        pruner: Arc<Pruner>,
158    ) -> Result<BoxedRecordBatchStream> {
159        pruner.add_partition_ranges(partition_ranges);
160        let partition_pruner = Arc::new(PartitionPruner::new(pruner, partition_ranges));
161
162        let mut sources = Vec::new();
163        for part_range in partition_ranges {
164            build_flat_sources(
165                stream_ctx,
166                part_range,
167                true,
168                part_metrics,
169                partition_pruner.clone(),
170                &mut sources,
171                None,
172            )
173            .await?;
174        }
175
176        common_telemetry::debug!(
177            "Build flat reader to read all parts, region_id: {}, num_part_ranges: {}, num_sources: {}",
178            stream_ctx.input.mapper.metadata().region_id,
179            partition_ranges.len(),
180            sources.len()
181        );
182        Self::build_flat_reader_from_sources(
183            stream_ctx,
184            sources,
185            None,
186            None,
187            false,
188            compute_parallel_channel_size(DEFAULT_READ_BATCH_SIZE),
189        )
190        .await
191    }
192
193    /// Builds a flat reader to read sources that returns RecordBatch.
194    /// If `semaphore` is provided, reads sources in parallel if possible.
195    /// If `skip_dedup` is true, the merged stream is returned without applying flat dedup.
196    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all)]
197    pub(crate) async fn build_flat_reader_from_sources(
198        stream_ctx: &StreamContext,
199        mut sources: Vec<BoxedRecordBatchStream>,
200        semaphore: Option<Arc<Semaphore>>,
201        part_metrics: Option<&PartitionMetrics>,
202        skip_dedup: bool,
203        channel_size: usize,
204    ) -> Result<BoxedRecordBatchStream> {
205        if let Some(semaphore) = semaphore.as_ref() {
206            // Read sources in parallel.
207            if sources.len() > 1 {
208                sources = stream_ctx.input.create_parallel_flat_sources(
209                    sources,
210                    semaphore.clone(),
211                    channel_size,
212                )?;
213            }
214        }
215
216        let mapper = stream_ctx.input.mapper.as_flat().unwrap();
217        let schema = mapper.input_arrow_schema(stream_ctx.input.compaction);
218
219        let metrics_reporter = part_metrics.map(|m| m.merge_metrics_reporter());
220        let reader =
221            FlatMergeReader::new(schema, sources, DEFAULT_READ_BATCH_SIZE, metrics_reporter)
222                .await?;
223
224        let dedup = !skip_dedup && !stream_ctx.input.append_mode;
225        let dedup_metrics_reporter = part_metrics.map(|m| m.dedup_metrics_reporter());
226        let reader = if dedup {
227            match stream_ctx.input.merge_mode {
228                MergeMode::LastRow => Box::pin(
229                    FlatDedupReader::new(
230                        reader.into_stream().boxed(),
231                        FlatLastRow::new(stream_ctx.input.filter_deleted),
232                        dedup_metrics_reporter,
233                    )
234                    .into_stream(),
235                ) as _,
236                MergeMode::LastNonNull => Box::pin(
237                    FlatDedupReader::new(
238                        reader.into_stream().boxed(),
239                        FlatLastNonNull::new(
240                            mapper.field_column_start(),
241                            stream_ctx.input.filter_deleted,
242                        ),
243                        dedup_metrics_reporter,
244                    )
245                    .into_stream(),
246                ) as _,
247            }
248        } else {
249            Box::pin(reader.into_stream()) as _
250        };
251
252        let reader = match &stream_ctx.input.series_row_selector {
253            Some(TimeSeriesRowSelector::LastRow) => {
254                Box::pin(FlatLastRowReader::new(reader).into_stream()) as _
255            }
256            None => reader,
257        };
258
259        Ok(reader)
260    }
261
262    /// Builds a flat read stream for one partition range.
263    pub(crate) async fn build_flat_partition_range_read(
264        stream_ctx: &Arc<StreamContext>,
265        part_range: &PartitionRange,
266        compaction: bool,
267        part_metrics: &PartitionMetrics,
268        partition_pruner: Arc<PartitionPruner>,
269        file_scan_semaphore: Option<Arc<Semaphore>>,
270        merge_semaphore: Option<Arc<Semaphore>>,
271    ) -> Result<(BoxedRecordBatchStream, usize)> {
272        let cache_key = build_range_cache_key(stream_ctx, part_range);
273
274        if let Some(key) = cache_key.as_ref() {
275            if let Some(value) = stream_ctx.input.cache_strategy.get_range_result(key) {
276                part_metrics.inc_range_cache_hit();
277                return Ok((cached_flat_range_stream(value), DEFAULT_READ_BATCH_SIZE));
278            }
279            part_metrics.inc_range_cache_miss();
280        }
281
282        let mut sources = Vec::new();
283        let split_batch_size = build_flat_sources(
284            stream_ctx,
285            part_range,
286            compaction,
287            part_metrics,
288            partition_pruner,
289            &mut sources,
290            file_scan_semaphore,
291        )
292        .await?;
293        let estimated_rows_per_batch = split_batch_size.unwrap_or(DEFAULT_READ_BATCH_SIZE);
294        let channel_size = compute_parallel_channel_size(estimated_rows_per_batch);
295        let stream = Self::build_flat_reader_from_sources(
296            stream_ctx,
297            sources,
298            merge_semaphore,
299            Some(part_metrics),
300            false,
301            channel_size,
302        )
303        .await?;
304
305        let stream = match cache_key {
306            Some(key) => cache_flat_range_stream(
307                stream,
308                stream_ctx.input.cache_strategy.clone(),
309                key,
310                part_metrics.clone(),
311            ),
312            None => stream,
313        };
314
315        Ok((stream, estimated_rows_per_batch))
316    }
317
318    /// Scans the given partition when the part list is set properly.
319    /// Otherwise the returned stream might not contains any data.
320    fn scan_partition_impl(
321        &self,
322        ctx: &QueryScanContext,
323        metrics_set: &ExecutionPlanMetricsSet,
324        partition: usize,
325    ) -> Result<SendableRecordBatchStream> {
326        if ctx.explain_verbose {
327            common_telemetry::info!(
328                "SeqScan partition {}, region_id: {}",
329                partition,
330                self.stream_ctx.input.region_metadata().region_id
331            );
332        }
333
334        let metrics = self.new_partition_metrics(ctx.explain_verbose, metrics_set, partition);
335        let input = &self.stream_ctx.input;
336
337        let batch_stream = self.scan_flat_batch_in_partition(partition, metrics.clone())?;
338        let record_batch_stream = ConvertBatchStream::new(
339            batch_stream,
340            input.mapper.clone(),
341            input.cache_strategy.clone(),
342            metrics,
343        );
344
345        Ok(Box::pin(RecordBatchStreamWrapper::new(
346            input.mapper.output_schema(),
347            Box::pin(record_batch_stream),
348        )))
349    }
350
351    #[tracing::instrument(
352        skip_all,
353        fields(
354            region_id = %self.stream_ctx.input.mapper.metadata().region_id,
355            partition = partition
356        )
357    )]
358    fn scan_flat_batch_in_partition(
359        &self,
360        partition: usize,
361        part_metrics: PartitionMetrics,
362    ) -> Result<ScanBatchStream> {
363        ensure!(
364            partition < self.properties.partitions.len(),
365            PartitionOutOfRangeSnafu {
366                given: partition,
367                all: self.properties.partitions.len(),
368            }
369        );
370
371        if self.properties.partitions[partition].is_empty() {
372            return Ok(Box::pin(futures::stream::empty()));
373        }
374
375        let stream_ctx = self.stream_ctx.clone();
376        let semaphore = self.new_semaphore();
377        let partition_ranges = self.properties.partitions[partition].clone();
378        let compaction = self.stream_ctx.input.compaction;
379        let file_scan_semaphore = if compaction { None } else { semaphore.clone() };
380        let pruner = self.pruner.clone();
381        // Initializes ref counts for the pruner.
382        // If we call scan_batch_in_partition() multiple times but don't read all batches from the stream,
383        // then the ref count won't be decremented.
384        // This is a rare case and keeping all remaining entries still uses less memory than a per partition cache.
385        pruner.add_partition_ranges(&partition_ranges);
386        let partition_pruner = Arc::new(PartitionPruner::new(pruner, &partition_ranges));
387
388        let stream = try_stream! {
389            part_metrics.on_first_poll();
390            // Start fetch time before building sources so scan cost contains
391            // build part cost.
392            let mut fetch_start = Instant::now();
393
394            // Scans each part.
395            for part_range in partition_ranges {
396                let (mut reader, _) = Self::build_flat_partition_range_read(
397                    &stream_ctx,
398                    &part_range,
399                    compaction,
400                    &part_metrics,
401                    partition_pruner.clone(),
402                    file_scan_semaphore.clone(),
403                    semaphore.clone(),
404                )
405                .await?;
406
407                let mut metrics = ScannerMetrics {
408                    scan_cost: fetch_start.elapsed(),
409                    ..Default::default()
410                };
411                fetch_start = Instant::now();
412
413                while let Some(record_batch) = reader.try_next().await? {
414                    metrics.scan_cost += fetch_start.elapsed();
415                    metrics.num_batches += 1;
416                    metrics.num_rows += record_batch.num_rows();
417
418                    debug_assert!(record_batch.num_rows() > 0);
419                    if record_batch.num_rows() == 0 {
420                        fetch_start = Instant::now();
421                        continue;
422                    }
423
424                    let yield_start = Instant::now();
425                    yield ScanBatch::RecordBatch(record_batch);
426                    metrics.yield_cost += yield_start.elapsed();
427
428                    fetch_start = Instant::now();
429                }
430
431                metrics.scan_cost += fetch_start.elapsed();
432                fetch_start = Instant::now();
433                part_metrics.merge_metrics(&metrics);
434            }
435
436            part_metrics.on_finish();
437        };
438        Ok(Box::pin(stream))
439    }
440
441    fn new_semaphore(&self) -> Option<Arc<Semaphore>> {
442        if self.properties.target_partitions() > self.properties.num_partitions() {
443            // We can use additional tasks to read the data if we have more target partitions than actual partitions.
444            // This semaphore is partition level.
445            // We don't use a global semaphore to avoid a partition waiting for others. The final concurrency
446            // of tasks usually won't exceed the target partitions a lot as compaction can reduce the number of
447            // files in a part range.
448            Some(Arc::new(Semaphore::new(
449                self.properties.target_partitions() - self.properties.num_partitions() + 1,
450            )))
451        } else {
452            None
453        }
454    }
455
456    /// Creates a new partition metrics instance.
457    /// Sets the partition metrics for the given partition if it is not for compaction.
458    fn new_partition_metrics(
459        &self,
460        explain_verbose: bool,
461        metrics_set: &ExecutionPlanMetricsSet,
462        partition: usize,
463    ) -> PartitionMetrics {
464        let metrics = PartitionMetrics::new(
465            self.stream_ctx.input.mapper.metadata().region_id,
466            partition,
467            get_scanner_type(self.stream_ctx.input.compaction),
468            self.stream_ctx.query_start,
469            explain_verbose,
470            metrics_set,
471        );
472
473        if !self.stream_ctx.input.compaction {
474            self.metrics_list.set(partition, metrics.clone());
475        }
476
477        metrics
478    }
479
480    /// Finds the maximum number of files to read in a single partition range.
481    fn max_files_in_partition(ranges: &[RangeMeta], partition_ranges: &[PartitionRange]) -> usize {
482        partition_ranges
483            .iter()
484            .map(|part_range| {
485                let range_meta = &ranges[part_range.identifier];
486                range_meta.indices.len()
487            })
488            .max()
489            .unwrap_or(0)
490    }
491
492    /// Checks resource limit for the scanner.
493    pub(crate) fn check_scan_limit(&self) -> Result<()> {
494        // Check max file count limit for all partitions since we scan them in parallel.
495        let total_max_files: usize = self
496            .properties
497            .partitions
498            .iter()
499            .map(|partition| Self::max_files_in_partition(&self.stream_ctx.ranges, partition))
500            .sum();
501
502        let max_concurrent_files = self.stream_ctx.input.max_concurrent_scan_files;
503        if total_max_files > max_concurrent_files {
504            return TooManyFilesToReadSnafu {
505                actual: total_max_files,
506                max: max_concurrent_files,
507            }
508            .fail();
509        }
510
511        Ok(())
512    }
513}
514
515impl RegionScanner for SeqScan {
516    fn name(&self) -> &str {
517        "SeqScan"
518    }
519
520    fn properties(&self) -> &ScannerProperties {
521        &self.properties
522    }
523
524    fn schema(&self) -> SchemaRef {
525        self.stream_ctx.input.mapper.output_schema()
526    }
527
528    fn metadata(&self) -> RegionMetadataRef {
529        self.stream_ctx.input.mapper.metadata().clone()
530    }
531
532    fn scan_partition(
533        &self,
534        ctx: &QueryScanContext,
535        metrics_set: &ExecutionPlanMetricsSet,
536        partition: usize,
537    ) -> Result<SendableRecordBatchStream, BoxedError> {
538        self.scan_partition_impl(ctx, metrics_set, partition)
539            .map_err(BoxedError::new)
540    }
541
542    fn prepare(&mut self, request: PrepareRequest) -> Result<(), BoxedError> {
543        self.properties.prepare(request);
544
545        self.check_scan_limit().map_err(BoxedError::new)?;
546
547        Ok(())
548    }
549
550    fn has_predicate_without_region(&self) -> bool {
551        let predicate = self
552            .stream_ctx
553            .input
554            .predicate_group()
555            .predicate_without_region();
556        predicate.is_some()
557    }
558
559    fn add_dyn_filter_to_predicate(
560        &mut self,
561        filter_exprs: Vec<Arc<dyn datafusion::physical_plan::PhysicalExpr>>,
562    ) -> Vec<bool> {
563        self.stream_ctx.add_dyn_filter_to_predicate(filter_exprs)
564    }
565
566    fn set_logical_region(&mut self, logical_region: bool) {
567        self.properties.set_logical_region(logical_region);
568    }
569}
570
571impl DisplayAs for SeqScan {
572    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
573        write!(
574            f,
575            "SeqScan: region={}, ",
576            self.stream_ctx.input.mapper.metadata().region_id
577        )?;
578        match t {
579            // TODO(LFC): Implement all the "TreeRender" display format.
580            DisplayFormatType::Default | DisplayFormatType::TreeRender => {
581                self.stream_ctx.format_for_explain(false, f)
582            }
583            DisplayFormatType::Verbose => {
584                self.stream_ctx.format_for_explain(true, f)?;
585                self.metrics_list.format_verbose_metrics(f)
586            }
587        }
588    }
589}
590
591impl fmt::Debug for SeqScan {
592    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
593        f.debug_struct("SeqScan")
594            .field("num_ranges", &self.stream_ctx.ranges.len())
595            .finish()
596    }
597}
598
599/// Builds flat sources for the partition range and push them to the `sources` vector.
600/// Returns the estimated rows per batch after splitting if splitting is applied, or `None`.
601pub(crate) async fn build_flat_sources(
602    stream_ctx: &Arc<StreamContext>,
603    part_range: &PartitionRange,
604    compaction: bool,
605    part_metrics: &PartitionMetrics,
606    partition_pruner: Arc<PartitionPruner>,
607    sources: &mut Vec<BoxedRecordBatchStream>,
608    semaphore: Option<Arc<Semaphore>>,
609) -> Result<Option<usize>> {
610    // Gets range meta.
611    let range_meta = &stream_ctx.ranges[part_range.identifier];
612    #[cfg(debug_assertions)]
613    if compaction {
614        // Compaction expects input sources are not been split.
615        debug_assert_eq!(range_meta.indices.len(), range_meta.row_group_indices.len());
616        for (i, row_group_idx) in range_meta.row_group_indices.iter().enumerate() {
617            // It should scan all row groups.
618            debug_assert_eq!(
619                -1, row_group_idx.row_group_index,
620                "Expect {} range scan all row groups, given: {}",
621                i, row_group_idx.row_group_index,
622            );
623        }
624    }
625
626    let read_type = if compaction {
627        "compaction"
628    } else {
629        "seq_scan_files"
630    };
631    let num_indices = range_meta.row_group_indices.len();
632    if num_indices == 0 {
633        return Ok(None);
634    }
635
636    let split_batch_size = should_split_flat_batches_for_merge(stream_ctx, range_meta);
637    let should_split = split_batch_size.is_some();
638    sources.reserve(num_indices);
639    let mut ordered_sources = Vec::with_capacity(num_indices);
640    ordered_sources.resize_with(num_indices, || None);
641    let mut file_scan_tasks = Vec::new();
642
643    for (position, index) in range_meta.row_group_indices.iter().enumerate() {
644        if stream_ctx.is_mem_range_index(*index) {
645            let stream = scan_flat_mem_ranges(
646                stream_ctx.clone(),
647                part_metrics.clone(),
648                *index,
649                range_meta.time_range,
650            );
651            ordered_sources[position] = Some(Box::pin(stream) as _);
652        } else if stream_ctx.is_file_range_index(*index) {
653            if let Some(semaphore_ref) = semaphore.as_ref() {
654                // run in parallel, controlled by semaphore
655                let stream_ctx = stream_ctx.clone();
656                let part_metrics = part_metrics.clone();
657                let partition_pruner = partition_pruner.clone();
658                let semaphore = Arc::clone(semaphore_ref);
659                let row_group_index = *index;
660                file_scan_tasks.push(async move {
661                    let _permit = semaphore.acquire().await.unwrap();
662                    let stream = scan_flat_file_ranges(
663                        stream_ctx,
664                        part_metrics,
665                        row_group_index,
666                        read_type,
667                        partition_pruner,
668                    )
669                    .await?;
670                    Ok((position, Box::pin(stream) as _))
671                });
672            } else {
673                // no semaphore, run sequentially
674                let stream = scan_flat_file_ranges(
675                    stream_ctx.clone(),
676                    part_metrics.clone(),
677                    *index,
678                    read_type,
679                    partition_pruner.clone(),
680                )
681                .await?;
682                ordered_sources[position] = Some(Box::pin(stream) as _);
683            }
684        } else {
685            let stream =
686                scan_util::maybe_scan_flat_other_ranges(stream_ctx, *index, part_metrics).await?;
687            ordered_sources[position] = Some(stream);
688        }
689    }
690
691    if !file_scan_tasks.is_empty() {
692        let results = futures::future::try_join_all(file_scan_tasks).await?;
693        for (position, stream) in results {
694            ordered_sources[position] = Some(stream);
695        }
696    }
697
698    for stream in ordered_sources.into_iter().flatten() {
699        if should_split {
700            sources.push(Box::pin(SplitRecordBatchStream::new(stream)));
701        } else {
702            sources.push(stream);
703        }
704    }
705
706    if should_split {
707        common_telemetry::debug!(
708            "Splitting record batches, region: {}, sources: {}, part_range: {:?}",
709            stream_ctx.input.region_metadata().region_id,
710            sources.len(),
711            part_range,
712        );
713    }
714
715    Ok(split_batch_size)
716}
717
718#[cfg(test)]
719impl SeqScan {
720    /// Returns the input.
721    pub(crate) fn input(&self) -> &ScanInput {
722        &self.stream_ctx.input
723    }
724}
725
726/// Returns the scanner type.
727fn get_scanner_type(compaction: bool) -> &'static str {
728    if compaction {
729        "SeqScan(compaction)"
730    } else {
731        "SeqScan"
732    }
733}