Skip to main content

cmd/datanode/
scanbench.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16use std::path::PathBuf;
17use std::sync::Arc;
18use std::time::Instant;
19
20use clap::Parser;
21use colored::Colorize;
22use common_base::Plugins;
23use common_error::ext::{BoxedError, PlainError};
24use common_error::status_code::StatusCode;
25use common_meta::cache::{new_schema_cache, new_table_schema_cache};
26use common_meta::key::SchemaMetadataManager;
27use common_meta::kv_backend::memory::MemoryKvBackend;
28use common_wal::config::DatanodeWalConfig;
29use datafusion::execution::SessionStateBuilder;
30use datafusion::logical_expr::{BinaryExpr, Expr as DfExpr, ExprSchemable, Operator};
31use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
32use datafusion_common::{DFSchemaRef, ScalarValue, ToDFSchema};
33use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
34use datatypes::arrow::compute;
35use futures::StreamExt;
36use futures::stream::FuturesUnordered;
37use log_store::kafka::log_store::KafkaLogStore;
38use log_store::noop::log_store::NoopLogStore;
39use log_store::raft_engine::log_store::RaftEngineLogStore;
40use mito2::config::MitoConfig;
41use mito2::engine::MitoEngine;
42use mito2::sst::file_ref::FileReferenceManager;
43use moka::future::CacheBuilder;
44use object_store::manager::ObjectStoreManager;
45use object_store::util::normalize_dir;
46use query::optimizer::parallelize_scan::ParallelizeScan;
47use serde::Deserialize;
48use snafu::{OptionExt, ResultExt};
49use sqlparser::ast::ExprWithAlias as SqlExprWithAlias;
50use sqlparser::dialect::GenericDialect;
51use sqlparser::parser::Parser as SqlParser;
52use store_api::metadata::RegionMetadata;
53use store_api::path_utils::WAL_DIR;
54use store_api::region_engine::{PrepareRequest, QueryScanContext, RegionEngine};
55use store_api::region_request::{PathType, RegionOpenRequest, RegionRequest};
56use store_api::storage::{RegionId, ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector};
57use tokio::fs;
58
59use crate::datanode::objbench::{build_object_store, parse_config};
60use crate::error;
61
62/// Scan benchmark command - benchmarks scanning a region directly from storage.
63#[derive(Debug, Parser)]
64pub struct ScanbenchCommand {
65    /// Path to config TOML file (same format as standalone/datanode config)
66    #[clap(long, value_name = "FILE")]
67    config: PathBuf,
68
69    /// Region ID: either numeric u64 (e.g. "4398046511104") or "table_id:region_num" (e.g. "1024:0")
70    #[clap(long)]
71    region_id: String,
72
73    /// Table directory relative to data home (e.g. "data/greptime/public/1024/")
74    #[clap(long)]
75    table_dir: String,
76
77    /// Scanner type: seq, unordered, series
78    #[clap(long, default_value = "seq")]
79    scanner: String,
80
81    /// Path to scan request JSON config file (optional)
82    #[clap(long, value_name = "FILE")]
83    scan_config: Option<PathBuf>,
84
85    /// Number of partitions for parallel scan (simulates parallelism)
86    #[clap(long, default_value = "1")]
87    parallelism: usize,
88
89    /// Number of iterations for benchmarking
90    #[clap(long, default_value = "1")]
91    iterations: usize,
92
93    /// Path type for the region: bare, data, metadata
94    #[clap(long, default_value = "bare")]
95    path_type: String,
96
97    /// Verbose output
98    #[clap(short, long, default_value_t = false)]
99    verbose: bool,
100
101    /// Output pprof flamegraph
102    #[clap(long, value_name = "FILE")]
103    pprof_file: Option<PathBuf>,
104
105    /// Force reading the region in flat format.
106    #[clap(long, default_value_t = false)]
107    force_flat_format: bool,
108
109    /// Enable WAL replay when opening the region.
110    #[clap(long, default_value_t = false)]
111    enable_wal: bool,
112
113    /// Start pprof after the first iteration (use first iteration as warmup).
114    #[clap(long, default_value_t = false)]
115    pprof_after_warmup: bool,
116}
117
118/// JSON config for scan request parameters.
119#[derive(Debug, Deserialize, Default)]
120struct ScanConfig {
121    projection: Option<Vec<usize>>,
122    projection_names: Option<Vec<String>>,
123    filters: Option<Vec<String>>,
124    series_row_selector: Option<String>,
125}
126
127fn resolve_projection(
128    scan_config: &ScanConfig,
129    metadata: Option<&RegionMetadata>,
130) -> error::Result<Option<Vec<usize>>> {
131    if scan_config.projection.is_some() && scan_config.projection_names.is_some() {
132        return Err(error::IllegalConfigSnafu {
133            msg: "scan config cannot contain both 'projection' and 'projection_names'".to_string(),
134        }
135        .build());
136    }
137
138    if let Some(projection) = &scan_config.projection {
139        return Ok(Some(projection.clone()));
140    }
141
142    if let Some(projection_names) = &scan_config.projection_names {
143        let metadata = metadata.context(error::IllegalConfigSnafu {
144            msg: "Missing region metadata while resolving 'projection_names'".to_string(),
145        })?;
146        let available_columns = metadata
147            .column_metadatas
148            .iter()
149            .map(|column| column.column_schema.name.as_str())
150            .collect::<Vec<_>>()
151            .join(", ");
152        let projection = projection_names
153            .iter()
154            .map(|name| {
155                metadata
156                    .column_index_by_name(name)
157                    .with_context(|| error::IllegalConfigSnafu {
158                        msg: format!(
159                            "Unknown column '{}' in projection_names, available columns: [{}]",
160                            name, available_columns
161                        ),
162                    })
163            })
164            .collect::<error::Result<Vec<_>>>()?;
165        return Ok(Some(projection));
166    }
167
168    Ok(None)
169}
170
171fn format_bytes(bytes: u64) -> String {
172    const KIB: u64 = 1024;
173    const MIB: u64 = 1024 * KIB;
174    const GIB: u64 = 1024 * MIB;
175    if bytes >= GIB {
176        format!("{:.2} GiB", bytes as f64 / GIB as f64)
177    } else if bytes >= MIB {
178        format!("{:.2} MiB", bytes as f64 / MIB as f64)
179    } else if bytes >= KIB {
180        format!("{:.2} KiB", bytes as f64 / KIB as f64)
181    } else {
182        format!("{} B", bytes)
183    }
184}
185
186fn parse_region_id(s: &str) -> error::Result<RegionId> {
187    if s.contains(':') {
188        let parts: Vec<&str> = s.splitn(2, ':').collect();
189        let table_id: u32 = parts[0].parse().map_err(|e| {
190            error::IllegalConfigSnafu {
191                msg: format!("invalid table_id in region_id '{}': {}", s, e),
192            }
193            .build()
194        })?;
195        let region_num: u32 = parts[1].parse().map_err(|e| {
196            error::IllegalConfigSnafu {
197                msg: format!("invalid region_num in region_id '{}': {}", s, e),
198            }
199            .build()
200        })?;
201        Ok(RegionId::new(table_id, region_num))
202    } else {
203        let id: u64 = s.parse().map_err(|e| {
204            error::IllegalConfigSnafu {
205                msg: format!("invalid region_id '{}': {}", s, e),
206            }
207            .build()
208        })?;
209        Ok(RegionId::from_u64(id))
210    }
211}
212
213fn parse_path_type(s: &str) -> error::Result<PathType> {
214    match s.to_lowercase().as_str() {
215        "bare" => Ok(PathType::Bare),
216        "data" => Ok(PathType::Data),
217        "metadata" => Ok(PathType::Metadata),
218        _ => Err(error::IllegalConfigSnafu {
219            msg: format!("invalid path_type '{}', expected: bare, data, metadata", s),
220        }
221        .build()),
222    }
223}
224
225/// Rewrites literal values in comparison expressions to match the column's arrow type.
226struct LiteralTypeCaster {
227    schema: DFSchemaRef,
228}
229
230impl TreeNodeRewriter for LiteralTypeCaster {
231    type Node = DfExpr;
232
233    fn f_up(&mut self, expr: DfExpr) -> datafusion_common::Result<Transformed<DfExpr>> {
234        let DfExpr::BinaryExpr(BinaryExpr { left, op, right }) = &expr else {
235            return Ok(Transformed::no(expr));
236        };
237
238        if !matches!(
239            op,
240            Operator::Eq
241                | Operator::NotEq
242                | Operator::Lt
243                | Operator::LtEq
244                | Operator::Gt
245                | Operator::GtEq
246        ) {
247            return Ok(Transformed::no(expr));
248        }
249
250        let (col_expr, lit_expr, col_left) = match (left.as_ref(), right.as_ref()) {
251            (col @ DfExpr::Column(_), lit @ DfExpr::Literal(_, _)) => (col, lit, true),
252            (lit @ DfExpr::Literal(_, _), col @ DfExpr::Column(_)) => (col, lit, false),
253            _ => return Ok(Transformed::no(expr)),
254        };
255
256        let col_type = col_expr.get_type(self.schema.as_ref())?;
257        let DfExpr::Literal(scalar, _) = lit_expr else {
258            unreachable!()
259        };
260
261        if scalar.data_type() == col_type {
262            return Ok(Transformed::no(expr));
263        }
264
265        let lit_array = scalar.to_array()?;
266        let casted = compute::cast(lit_array.as_ref(), &col_type).map_err(|e| {
267            datafusion_common::DataFusionError::Internal(format!(
268                "Failed to cast literal {:?} to {:?}: {}",
269                scalar, col_type, e
270            ))
271        })?;
272        let casted_scalar = ScalarValue::try_from_array(&casted, 0)?;
273
274        let new_lit = DfExpr::Literal(casted_scalar, None);
275        let (new_left, new_right) = if col_left {
276            (left.clone(), Box::new(new_lit))
277        } else {
278            (Box::new(new_lit), right.clone())
279        };
280
281        Ok(Transformed::yes(DfExpr::BinaryExpr(BinaryExpr {
282            left: new_left,
283            op: *op,
284            right: new_right,
285        })))
286    }
287}
288
289fn convert_literal_types(
290    exprs: Vec<DfExpr>,
291    schema: &DFSchemaRef,
292) -> datafusion_common::Result<Vec<DfExpr>> {
293    use datafusion_common::tree_node::TreeNode;
294
295    let mut caster = LiteralTypeCaster {
296        schema: schema.clone(),
297    };
298    exprs
299        .into_iter()
300        .map(|e| e.rewrite(&mut caster).map(|x| x.data))
301        .collect()
302}
303
304fn resolve_filters(
305    scan_config: &ScanConfig,
306    metadata: &RegionMetadata,
307) -> error::Result<Vec<DfExpr>> {
308    let Some(filters) = &scan_config.filters else {
309        return Ok(Vec::new());
310    };
311
312    let df_schema = metadata
313        .schema
314        .arrow_schema()
315        .clone()
316        .to_dfschema()
317        .map_err(|e| {
318            error::IllegalConfigSnafu {
319                msg: format!("Failed to convert region schema to DataFusion schema: {e}"),
320            }
321            .build()
322        })?;
323
324    let state = SessionStateBuilder::new()
325        .with_config(Default::default())
326        .with_runtime_env(Default::default())
327        .with_default_features()
328        .build();
329
330    let exprs: Vec<DfExpr> = filters
331        .iter()
332        .enumerate()
333        .map(|(idx, filter)| {
334            let mut parser = SqlParser::new(&GenericDialect {})
335                .try_with_sql(filter)
336                .map_err(|e| {
337                    error::IllegalConfigSnafu {
338                        msg: format!("Invalid filter at index {idx} ('{filter}'): {e}"),
339                    }
340                    .build()
341                })?;
342
343            let sql_expr = parser.parse_expr().map_err(|e| {
344                error::IllegalConfigSnafu {
345                    msg: format!("Invalid filter at index {idx} ('{filter}'): {e}"),
346                }
347                .build()
348            })?;
349
350            state
351                .create_logical_expr_from_sql_expr(
352                    SqlExprWithAlias {
353                        expr: sql_expr,
354                        alias: None,
355                    },
356                    &df_schema,
357                )
358                .map_err(|e| {
359                    error::IllegalConfigSnafu {
360                        msg: format!(
361                            "Failed to convert filter at index {idx} ('{filter}') to logical expr: {e}"
362                        ),
363                    }
364                    .build()
365                })
366        })
367        .collect::<error::Result<Vec<_>>>()?;
368
369    let df_schema_ref = Arc::new(df_schema);
370    convert_literal_types(exprs, &df_schema_ref).map_err(|e| {
371        error::IllegalConfigSnafu {
372            msg: format!("Failed to convert filter expression types: {e}"),
373        }
374        .build()
375    })
376}
377
378fn noop_partition_expr_fetcher() -> mito2::region::opener::PartitionExprFetcherRef {
379    struct NoopPartitionExprFetcher;
380
381    #[async_trait::async_trait]
382    impl mito2::region::opener::PartitionExprFetcher for NoopPartitionExprFetcher {
383        async fn fetch_expr(&self, _region_id: RegionId) -> Option<String> {
384            None
385        }
386    }
387
388    Arc::new(NoopPartitionExprFetcher)
389}
390
391struct EngineComponents {
392    data_home: String,
393    mito_config: MitoConfig,
394    object_store_manager: Arc<ObjectStoreManager>,
395    schema_metadata_manager: Arc<SchemaMetadataManager>,
396    file_ref_manager: Arc<FileReferenceManager>,
397    partition_expr_fetcher: mito2::region::opener::PartitionExprFetcherRef,
398}
399
400impl EngineComponents {
401    async fn build<S: store_api::logstore::LogStore>(
402        self,
403        log_store: Arc<S>,
404    ) -> error::Result<MitoEngine> {
405        MitoEngine::new(
406            &self.data_home,
407            self.mito_config,
408            log_store,
409            self.object_store_manager,
410            self.schema_metadata_manager,
411            self.file_ref_manager,
412            self.partition_expr_fetcher,
413            Plugins::default(),
414        )
415        .await
416        .map_err(BoxedError::new)
417        .context(error::BuildCliSnafu)
418    }
419}
420
421fn mock_schema_metadata_manager() -> Arc<SchemaMetadataManager> {
422    let kv_backend = Arc::new(MemoryKvBackend::new());
423    let table_schema_cache = Arc::new(new_table_schema_cache(
424        "table_schema_name_cache".to_string(),
425        CacheBuilder::default().build(),
426        kv_backend.clone(),
427    ));
428    let schema_cache = Arc::new(new_schema_cache(
429        "schema_cache".to_string(),
430        CacheBuilder::default().build(),
431        kv_backend.clone(),
432    ));
433    Arc::new(SchemaMetadataManager::new(table_schema_cache, schema_cache))
434}
435
436impl ScanbenchCommand {
437    pub async fn run(&self) -> error::Result<()> {
438        if self.verbose {
439            common_telemetry::init_default_ut_logging();
440        }
441
442        println!("{}", "Starting scanbench...".cyan().bold());
443
444        let region_id = parse_region_id(&self.region_id)?;
445        let path_type = parse_path_type(&self.path_type)?;
446        println!(
447            "{} Region ID: {} (u64: {})",
448            "✓".green(),
449            self.region_id,
450            region_id.as_u64()
451        );
452
453        // Parse config and build object store
454        let (store_cfg, mito_config, wal_config) = parse_config(&self.config)?;
455        println!("{} Config parsed", "✓".green());
456
457        let object_store = build_object_store(&store_cfg).await?;
458        println!("{} Object store initialized", "✓".green());
459
460        let object_store_manager =
461            Arc::new(ObjectStoreManager::new("default", object_store.clone()));
462
463        // Create mock dependencies
464        let schema_metadata_manager = mock_schema_metadata_manager();
465        let file_ref_manager = Arc::new(FileReferenceManager::new(None));
466        let partition_expr_fetcher = noop_partition_expr_fetcher();
467
468        // Create MitoEngine with appropriate log store
469        let components = EngineComponents {
470            data_home: store_cfg.data_home.clone(),
471            mito_config,
472            object_store_manager,
473            schema_metadata_manager,
474            file_ref_manager,
475            partition_expr_fetcher,
476        };
477
478        let engine = match &wal_config {
479            DatanodeWalConfig::RaftEngine(raft_engine_config) if self.enable_wal => {
480                let data_home = normalize_dir(&store_cfg.data_home);
481                let wal_dir = match &raft_engine_config.dir {
482                    Some(dir) => dir.clone(),
483                    None => format!("{}{WAL_DIR}", data_home),
484                };
485                fs::create_dir_all(&wal_dir).await.map_err(|e| {
486                    error::IllegalConfigSnafu {
487                        msg: format!("failed to create WAL directory {}: {e}", wal_dir),
488                    }
489                    .build()
490                })?;
491                let log_store = Arc::new(
492                    RaftEngineLogStore::try_new(wal_dir, raft_engine_config)
493                        .await
494                        .map_err(BoxedError::new)
495                        .context(error::BuildCliSnafu)?,
496                );
497                println!("{} Using RaftEngine WAL", "✓".green());
498                components.build(log_store).await?
499            }
500            DatanodeWalConfig::Kafka(kafka_config) if self.enable_wal => {
501                let log_store = Arc::new(
502                    KafkaLogStore::try_new(kafka_config, None)
503                        .await
504                        .map_err(BoxedError::new)
505                        .context(error::BuildCliSnafu)?,
506                );
507                println!("{} Using Kafka WAL", "✓".green());
508                components.build(log_store).await?
509            }
510            _ => {
511                let log_store = Arc::new(NoopLogStore);
512                println!(
513                    "{} Using NoopLogStore (enable_wal={})",
514                    "✓".green(),
515                    self.enable_wal
516                );
517                components.build(log_store).await?
518            }
519        };
520
521        // Open region
522        let open_request = RegionOpenRequest {
523            engine: "mito".to_string(),
524            table_dir: self.table_dir.clone(),
525            path_type,
526            options: HashMap::default(),
527            skip_wal_replay: !self.enable_wal,
528            checkpoint: None,
529        };
530
531        engine
532            .handle_request(region_id, RegionRequest::Open(open_request))
533            .await
534            .map_err(BoxedError::new)
535            .context(error::BuildCliSnafu)?;
536        println!("{} Region opened", "✓".green());
537
538        // Load scan config
539        let scan_config = if let Some(path) = &self.scan_config {
540            let content = tokio::fs::read_to_string(path)
541                .await
542                .context(error::FileIoSnafu)?;
543            serde_json::from_str::<ScanConfig>(&content).context(error::SerdeJsonSnafu)?
544        } else {
545            ScanConfig::default()
546        };
547        let metadata = engine
548            .get_metadata(region_id)
549            .await
550            .map_err(BoxedError::new)
551            .context(error::BuildCliSnafu)?;
552        let projection = resolve_projection(&scan_config, Some(&metadata))?;
553        let filters = resolve_filters(&scan_config, &metadata)?;
554
555        // Build scan request
556        let distribution = match self.scanner.as_str() {
557            "seq" => None,
558            "unordered" => Some(TimeSeriesDistribution::TimeWindowed),
559            "series" => Some(TimeSeriesDistribution::PerSeries),
560            other => {
561                return Err(error::IllegalConfigSnafu {
562                    msg: format!(
563                        "Unknown scanner type '{}', expected: seq, unordered, series",
564                        other
565                    ),
566                }
567                .build());
568            }
569        };
570
571        let series_row_selector = match scan_config.series_row_selector.as_deref() {
572            Some("last_row") => Some(TimeSeriesRowSelector::LastRow),
573            Some(other) => {
574                return Err(error::IllegalConfigSnafu {
575                    msg: format!("Unknown series_row_selector '{}'", other),
576                }
577                .build());
578            }
579            None => None,
580        };
581
582        println!(
583            "{} Scanner: {}, Parallelism: {}, Iterations: {}, Force flat format: {}",
584            "ℹ".blue(),
585            self.scanner,
586            self.parallelism,
587            self.iterations,
588            self.force_flat_format,
589        );
590
591        // Start profiling if pprof_file is specified (unless pprof_after_warmup is set)
592        #[cfg(unix)]
593        let mut profiler_guard = if self.pprof_file.is_some() && !self.pprof_after_warmup {
594            println!("{} Starting profiling...", "⚡".yellow());
595            Some(
596                pprof::ProfilerGuardBuilder::default()
597                    .frequency(99)
598                    .blocklist(&["libc", "libgcc", "pthread", "vdso"])
599                    .build()
600                    .map_err(|e| {
601                        BoxedError::new(PlainError::new(
602                            format!("Failed to start profiler: {e}"),
603                            StatusCode::Unexpected,
604                        ))
605                    })
606                    .context(error::BuildCliSnafu)?,
607            )
608        } else {
609            None
610        };
611
612        #[cfg(not(unix))]
613        if self.pprof_file.is_some() {
614            eprintln!(
615                "{}: Profiling is not supported on this platform",
616                "Warning".yellow()
617            );
618        }
619
620        let mut total_rows_all = 0u64;
621        let mut total_elapsed_all = std::time::Duration::ZERO;
622
623        for iteration in 0..self.iterations {
624            let request = ScanRequest {
625                projection: projection.clone(),
626                filters: filters.clone(),
627                series_row_selector,
628                distribution,
629                force_flat_format: self.force_flat_format,
630                ..Default::default()
631            };
632
633            let start = Instant::now();
634
635            // Get scanner
636            let mut scanner = engine
637                .handle_query(region_id, request)
638                .await
639                .map_err(BoxedError::new)
640                .context(error::BuildCliSnafu)?;
641
642            // Get partition ranges and apply parallelism
643            let original_partitions = scanner.properties().partitions.clone();
644            let total_ranges: usize = original_partitions.iter().map(|p| p.len()).sum();
645
646            if self.verbose {
647                println!(
648                    "  {} Original partitions: {}, total ranges: {}",
649                    "ℹ".blue(),
650                    original_partitions.len(),
651                    total_ranges
652                );
653            }
654
655            if self.parallelism > 1 {
656                // Flatten all ranges
657                let all_ranges: Vec<_> = original_partitions.into_iter().flatten().collect();
658
659                // Distribute ranges across partitions
660                let mut partitions =
661                    ParallelizeScan::assign_partition_range(all_ranges, self.parallelism);
662
663                // Sort ranges within each partition by start time ascending
664                for partition in &mut partitions {
665                    partition.sort_by_key(|a| a.start);
666                }
667
668                scanner
669                    .prepare(
670                        PrepareRequest::default()
671                            .with_ranges(partitions)
672                            .with_target_partitions(self.parallelism),
673                    )
674                    .map_err(BoxedError::new)
675                    .context(error::BuildCliSnafu)?;
676            }
677
678            // Scan all partitions
679            let num_partitions = scanner.properties().partitions.len();
680            let ctx = QueryScanContext {
681                explain_verbose: self.verbose,
682            };
683            let metrics_set = ExecutionPlanMetricsSet::new();
684
685            let mut scan_futures = FuturesUnordered::new();
686
687            for partition_idx in 0..num_partitions {
688                let mut stream = scanner
689                    .scan_partition(&ctx, &metrics_set, partition_idx)
690                    .map_err(BoxedError::new)
691                    .context(error::BuildCliSnafu)?;
692
693                scan_futures.push(tokio::spawn(async move {
694                    let mut rows = 0u64;
695                    let mut array_mem_size = 0u64;
696                    let mut estimated_size = 0u64;
697                    while let Some(batch_result) = stream.next().await {
698                        match batch_result {
699                            Ok(batch) => {
700                                rows += batch.num_rows() as u64;
701                                let df_batch = batch.df_record_batch();
702                                array_mem_size += df_batch.get_array_memory_size() as u64;
703                                estimated_size +=
704                                    mito2::memtable::record_batch_estimated_size(df_batch) as u64;
705                            }
706                            Err(e) => {
707                                return Err(BoxedError::new(e));
708                            }
709                        }
710                    }
711                    Ok::<(u64, u64, u64), BoxedError>((rows, array_mem_size, estimated_size))
712                }));
713            }
714
715            let mut total_rows = 0u64;
716            let mut total_array_mem_size = 0u64;
717            let mut total_estimated_size = 0u64;
718            while let Some(task) = scan_futures.next().await {
719                let result = task
720                    .map_err(|e| {
721                        BoxedError::new(PlainError::new(
722                            format!("scan task failed: {e}"),
723                            StatusCode::Unexpected,
724                        ))
725                    })
726                    .context(error::BuildCliSnafu)?;
727                let (rows, array_mem_size, estimated_size) =
728                    result.context(error::BuildCliSnafu)?;
729                total_rows += rows;
730                total_array_mem_size += array_mem_size;
731                total_estimated_size += estimated_size;
732            }
733
734            let elapsed = start.elapsed();
735            total_rows_all += total_rows;
736            total_elapsed_all += elapsed;
737
738            println!(
739                "  [iter {}] {} rows in {:?} ({} partitions), array_mem_size: {}, estimated_size: {}",
740                iteration + 1,
741                total_rows.to_string().cyan(),
742                elapsed,
743                num_partitions,
744                format_bytes(total_array_mem_size),
745                format_bytes(total_estimated_size),
746            );
747
748            // Start profiling after the first iteration (warmup) if pprof_after_warmup is set
749            #[cfg(unix)]
750            if iteration == 0
751                && self.pprof_after_warmup
752                && self.pprof_file.is_some()
753                && profiler_guard.is_none()
754            {
755                println!(
756                    "{} Starting profiling after warmup iteration...",
757                    "⚡".yellow()
758                );
759                profiler_guard = Some(
760                    pprof::ProfilerGuardBuilder::default()
761                        .frequency(99)
762                        .blocklist(&["libc", "libgcc", "pthread", "vdso"])
763                        .build()
764                        .map_err(|e| {
765                            BoxedError::new(PlainError::new(
766                                format!("Failed to start profiler: {e}"),
767                                StatusCode::Unexpected,
768                            ))
769                        })
770                        .context(error::BuildCliSnafu)?,
771                );
772            }
773        }
774
775        // Stop profiling and generate flamegraph if enabled
776        #[cfg(unix)]
777        if let (Some(guard), Some(pprof_file)) = (profiler_guard, &self.pprof_file) {
778            println!("{} Generating flamegraph...", "🔥".yellow());
779            match guard.report().build() {
780                Ok(report) => {
781                    let mut flamegraph_data = Vec::new();
782                    if let Err(e) = report.flamegraph(&mut flamegraph_data) {
783                        println!("{}: Failed to generate flamegraph: {}", "Error".red(), e);
784                    } else if let Err(e) = std::fs::write(pprof_file, flamegraph_data) {
785                        println!(
786                            "{}: Failed to write flamegraph to {}: {}",
787                            "Error".red(),
788                            pprof_file.display(),
789                            e
790                        );
791                    } else {
792                        println!(
793                            "{} Flamegraph saved to {}",
794                            "✓".green(),
795                            pprof_file.display().to_string().cyan()
796                        );
797                    }
798                }
799                Err(e) => {
800                    println!("{}: Failed to generate pprof report: {}", "Error".red(), e);
801                }
802            }
803        }
804
805        // Summary
806        if self.iterations > 1 {
807            let avg_elapsed = total_elapsed_all / self.iterations as u32;
808            let avg_rows = total_rows_all / self.iterations as u64;
809            println!(
810                "\n{} Average: {} rows in {:?} over {} iterations",
811                "Summary".green().bold(),
812                avg_rows.to_string().cyan(),
813                avg_elapsed,
814                self.iterations,
815            );
816        }
817
818        println!("\n{}", "Benchmark completed!".green().bold());
819        Ok(())
820    }
821}
822
823#[cfg(test)]
824mod tests {
825    use datatypes::prelude::ConcreteDataType;
826    use datatypes::schema::ColumnSchema;
827    use sqlparser::ast::{BinaryOperator, Expr};
828    use sqlparser::dialect::GenericDialect;
829    use sqlparser::parser::Parser;
830    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
831    use store_api::storage::RegionId;
832
833    use super::{ScanConfig, resolve_filters, resolve_projection};
834    use crate::error;
835
836    #[test]
837    fn test_parse_scan_config_projection_names() {
838        let json = r#"{"projection_names":["host","ts"]}"#;
839        let config: ScanConfig = serde_json::from_str(json).unwrap();
840
841        assert_eq!(
842            config.projection_names,
843            Some(vec!["host".to_string(), "ts".to_string()])
844        );
845        assert_eq!(config.projection, None);
846    }
847
848    #[test]
849    fn test_resolve_projection_by_indexes() -> error::Result<()> {
850        let config = ScanConfig {
851            projection: Some(vec![0, 2]),
852            projection_names: None,
853            filters: None,
854            series_row_selector: None,
855        };
856
857        let projection = resolve_projection(&config, None)?;
858        assert_eq!(projection, Some(vec![0, 2]));
859        Ok(())
860    }
861
862    #[test]
863    fn test_resolve_projection_by_names_without_metadata() {
864        let config = ScanConfig {
865            projection: None,
866            projection_names: Some(vec!["cpu".to_string(), "host".to_string()]),
867            filters: None,
868            series_row_selector: None,
869        };
870
871        let err = resolve_projection(&config, None).unwrap_err();
872        assert!(
873            err.to_string()
874                .contains("Missing region metadata while resolving 'projection_names'")
875        );
876    }
877
878    #[test]
879    fn test_resolve_projection_conflict_fields() {
880        let config = ScanConfig {
881            projection: Some(vec![0]),
882            projection_names: Some(vec!["host".to_string()]),
883            filters: None,
884            series_row_selector: None,
885        };
886
887        let err = resolve_projection(&config, None).unwrap_err();
888        let msg = err.to_string();
889        assert!(msg.contains("projection"));
890        assert!(msg.contains("projection_names"));
891    }
892
893    #[test]
894    fn test_sqlparser_parse_expr_string() {
895        let dialect = GenericDialect {};
896        let mut parser = Parser::new(&dialect)
897            .try_with_sql("host = 'web-1' AND cpu > 80")
898            .unwrap();
899
900        let expr = parser.parse_expr().unwrap();
901
902        match expr {
903            Expr::BinaryOp { op, .. } => assert_eq!(op, BinaryOperator::And),
904            other => panic!("expected BinaryOp, got: {other:?}"),
905        }
906    }
907
908    #[test]
909    fn test_resolve_filters_uint32_type_conversion() {
910        use api::v1::SemanticType;
911
912        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 0));
913        builder
914            .push_column_metadata(ColumnMetadata {
915                column_schema: ColumnSchema::new(
916                    "table_id",
917                    ConcreteDataType::uint32_datatype(),
918                    false,
919                ),
920                semantic_type: SemanticType::Tag,
921                column_id: 1,
922            })
923            .push_column_metadata(ColumnMetadata {
924                column_schema: ColumnSchema::new(
925                    "ts",
926                    ConcreteDataType::timestamp_millisecond_datatype(),
927                    false,
928                ),
929                semantic_type: SemanticType::Timestamp,
930                column_id: 2,
931            })
932            .primary_key(vec![1]);
933        let metadata = builder.build().unwrap();
934
935        let config = ScanConfig {
936            projection: None,
937            projection_names: None,
938            filters: Some(vec!["table_id = 1117".to_string()]),
939            series_row_selector: None,
940        };
941
942        let exprs = resolve_filters(&config, &metadata).unwrap();
943        assert_eq!(exprs.len(), 1);
944        // The expression should contain a UInt32 literal after type conversion.
945        let expr_str = format!("{}", exprs[0]);
946        assert!(
947            expr_str.contains("UInt32(1117)"),
948            "Expected UInt32(1117) in expression, got: {expr_str}"
949        );
950    }
951
952    #[test]
953    fn test_parse_scan_config_filters() {
954        let json = r#"{"filters":["host = 'web-1'","cpu > 80"]}"#;
955        let config: ScanConfig = serde_json::from_str(json).unwrap();
956
957        assert_eq!(
958            config.filters,
959            Some(vec!["host = 'web-1'".to_string(), "cpu > 80".to_string()])
960        );
961    }
962}