1use std::collections::HashMap;
16use std::path::PathBuf;
17use std::sync::Arc;
18use std::time::Instant;
19
20use clap::Parser;
21use colored::Colorize;
22use common_base::Plugins;
23use common_error::ext::{BoxedError, PlainError};
24use common_error::status_code::StatusCode;
25use common_meta::cache::{new_schema_cache, new_table_schema_cache};
26use common_meta::key::SchemaMetadataManager;
27use common_meta::kv_backend::memory::MemoryKvBackend;
28use common_wal::config::DatanodeWalConfig;
29use datafusion::execution::SessionStateBuilder;
30use datafusion::logical_expr::{BinaryExpr, Expr as DfExpr, ExprSchemable, Operator};
31use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
32use datafusion_common::{DFSchemaRef, ScalarValue, ToDFSchema};
33use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
34use datatypes::arrow::compute;
35use futures::StreamExt;
36use futures::stream::FuturesUnordered;
37use log_store::kafka::log_store::KafkaLogStore;
38use log_store::noop::log_store::NoopLogStore;
39use log_store::raft_engine::log_store::RaftEngineLogStore;
40use mito2::config::MitoConfig;
41use mito2::engine::MitoEngine;
42use mito2::sst::file_ref::FileReferenceManager;
43use moka::future::CacheBuilder;
44use object_store::manager::ObjectStoreManager;
45use object_store::util::normalize_dir;
46use query::optimizer::parallelize_scan::ParallelizeScan;
47use serde::Deserialize;
48use snafu::{OptionExt, ResultExt};
49use sqlparser::ast::ExprWithAlias as SqlExprWithAlias;
50use sqlparser::dialect::GenericDialect;
51use sqlparser::parser::Parser as SqlParser;
52use store_api::metadata::RegionMetadata;
53use store_api::path_utils::WAL_DIR;
54use store_api::region_engine::{PrepareRequest, QueryScanContext, RegionEngine};
55use store_api::region_request::{PathType, RegionOpenRequest, RegionRequest};
56use store_api::storage::{RegionId, ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector};
57use tokio::fs;
58
59use crate::datanode::objbench::{build_object_store, parse_config};
60use crate::error;
61
62#[derive(Debug, Parser)]
64pub struct ScanbenchCommand {
65 #[clap(long, value_name = "FILE")]
67 config: PathBuf,
68
69 #[clap(long)]
71 region_id: String,
72
73 #[clap(long)]
75 table_dir: String,
76
77 #[clap(long, default_value = "seq")]
79 scanner: String,
80
81 #[clap(long, value_name = "FILE")]
83 scan_config: Option<PathBuf>,
84
85 #[clap(long, default_value = "1")]
87 parallelism: usize,
88
89 #[clap(long, default_value = "1")]
91 iterations: usize,
92
93 #[clap(long, default_value = "bare")]
95 path_type: String,
96
97 #[clap(short, long, default_value_t = false)]
99 verbose: bool,
100
101 #[clap(long, value_name = "FILE")]
103 pprof_file: Option<PathBuf>,
104
105 #[clap(long, default_value_t = false)]
107 force_flat_format: bool,
108
109 #[clap(long, default_value_t = false)]
111 enable_wal: bool,
112
113 #[clap(long, default_value_t = false)]
115 pprof_after_warmup: bool,
116}
117
118#[derive(Debug, Deserialize, Default)]
120struct ScanConfig {
121 projection: Option<Vec<usize>>,
122 projection_names: Option<Vec<String>>,
123 filters: Option<Vec<String>>,
124 series_row_selector: Option<String>,
125}
126
127fn resolve_projection(
128 scan_config: &ScanConfig,
129 metadata: Option<&RegionMetadata>,
130) -> error::Result<Option<Vec<usize>>> {
131 if scan_config.projection.is_some() && scan_config.projection_names.is_some() {
132 return Err(error::IllegalConfigSnafu {
133 msg: "scan config cannot contain both 'projection' and 'projection_names'".to_string(),
134 }
135 .build());
136 }
137
138 if let Some(projection) = &scan_config.projection {
139 return Ok(Some(projection.clone()));
140 }
141
142 if let Some(projection_names) = &scan_config.projection_names {
143 let metadata = metadata.context(error::IllegalConfigSnafu {
144 msg: "Missing region metadata while resolving 'projection_names'".to_string(),
145 })?;
146 let available_columns = metadata
147 .column_metadatas
148 .iter()
149 .map(|column| column.column_schema.name.as_str())
150 .collect::<Vec<_>>()
151 .join(", ");
152 let projection = projection_names
153 .iter()
154 .map(|name| {
155 metadata
156 .column_index_by_name(name)
157 .with_context(|| error::IllegalConfigSnafu {
158 msg: format!(
159 "Unknown column '{}' in projection_names, available columns: [{}]",
160 name, available_columns
161 ),
162 })
163 })
164 .collect::<error::Result<Vec<_>>>()?;
165 return Ok(Some(projection));
166 }
167
168 Ok(None)
169}
170
171fn format_bytes(bytes: u64) -> String {
172 const KIB: u64 = 1024;
173 const MIB: u64 = 1024 * KIB;
174 const GIB: u64 = 1024 * MIB;
175 if bytes >= GIB {
176 format!("{:.2} GiB", bytes as f64 / GIB as f64)
177 } else if bytes >= MIB {
178 format!("{:.2} MiB", bytes as f64 / MIB as f64)
179 } else if bytes >= KIB {
180 format!("{:.2} KiB", bytes as f64 / KIB as f64)
181 } else {
182 format!("{} B", bytes)
183 }
184}
185
186fn parse_region_id(s: &str) -> error::Result<RegionId> {
187 if s.contains(':') {
188 let parts: Vec<&str> = s.splitn(2, ':').collect();
189 let table_id: u32 = parts[0].parse().map_err(|e| {
190 error::IllegalConfigSnafu {
191 msg: format!("invalid table_id in region_id '{}': {}", s, e),
192 }
193 .build()
194 })?;
195 let region_num: u32 = parts[1].parse().map_err(|e| {
196 error::IllegalConfigSnafu {
197 msg: format!("invalid region_num in region_id '{}': {}", s, e),
198 }
199 .build()
200 })?;
201 Ok(RegionId::new(table_id, region_num))
202 } else {
203 let id: u64 = s.parse().map_err(|e| {
204 error::IllegalConfigSnafu {
205 msg: format!("invalid region_id '{}': {}", s, e),
206 }
207 .build()
208 })?;
209 Ok(RegionId::from_u64(id))
210 }
211}
212
213fn parse_path_type(s: &str) -> error::Result<PathType> {
214 match s.to_lowercase().as_str() {
215 "bare" => Ok(PathType::Bare),
216 "data" => Ok(PathType::Data),
217 "metadata" => Ok(PathType::Metadata),
218 _ => Err(error::IllegalConfigSnafu {
219 msg: format!("invalid path_type '{}', expected: bare, data, metadata", s),
220 }
221 .build()),
222 }
223}
224
225struct LiteralTypeCaster {
227 schema: DFSchemaRef,
228}
229
230impl TreeNodeRewriter for LiteralTypeCaster {
231 type Node = DfExpr;
232
233 fn f_up(&mut self, expr: DfExpr) -> datafusion_common::Result<Transformed<DfExpr>> {
234 let DfExpr::BinaryExpr(BinaryExpr { left, op, right }) = &expr else {
235 return Ok(Transformed::no(expr));
236 };
237
238 if !matches!(
239 op,
240 Operator::Eq
241 | Operator::NotEq
242 | Operator::Lt
243 | Operator::LtEq
244 | Operator::Gt
245 | Operator::GtEq
246 ) {
247 return Ok(Transformed::no(expr));
248 }
249
250 let (col_expr, lit_expr, col_left) = match (left.as_ref(), right.as_ref()) {
251 (col @ DfExpr::Column(_), lit @ DfExpr::Literal(_, _)) => (col, lit, true),
252 (lit @ DfExpr::Literal(_, _), col @ DfExpr::Column(_)) => (col, lit, false),
253 _ => return Ok(Transformed::no(expr)),
254 };
255
256 let col_type = col_expr.get_type(self.schema.as_ref())?;
257 let DfExpr::Literal(scalar, _) = lit_expr else {
258 unreachable!()
259 };
260
261 if scalar.data_type() == col_type {
262 return Ok(Transformed::no(expr));
263 }
264
265 let lit_array = scalar.to_array()?;
266 let casted = compute::cast(lit_array.as_ref(), &col_type).map_err(|e| {
267 datafusion_common::DataFusionError::Internal(format!(
268 "Failed to cast literal {:?} to {:?}: {}",
269 scalar, col_type, e
270 ))
271 })?;
272 let casted_scalar = ScalarValue::try_from_array(&casted, 0)?;
273
274 let new_lit = DfExpr::Literal(casted_scalar, None);
275 let (new_left, new_right) = if col_left {
276 (left.clone(), Box::new(new_lit))
277 } else {
278 (Box::new(new_lit), right.clone())
279 };
280
281 Ok(Transformed::yes(DfExpr::BinaryExpr(BinaryExpr {
282 left: new_left,
283 op: *op,
284 right: new_right,
285 })))
286 }
287}
288
289fn convert_literal_types(
290 exprs: Vec<DfExpr>,
291 schema: &DFSchemaRef,
292) -> datafusion_common::Result<Vec<DfExpr>> {
293 use datafusion_common::tree_node::TreeNode;
294
295 let mut caster = LiteralTypeCaster {
296 schema: schema.clone(),
297 };
298 exprs
299 .into_iter()
300 .map(|e| e.rewrite(&mut caster).map(|x| x.data))
301 .collect()
302}
303
304fn resolve_filters(
305 scan_config: &ScanConfig,
306 metadata: &RegionMetadata,
307) -> error::Result<Vec<DfExpr>> {
308 let Some(filters) = &scan_config.filters else {
309 return Ok(Vec::new());
310 };
311
312 let df_schema = metadata
313 .schema
314 .arrow_schema()
315 .clone()
316 .to_dfschema()
317 .map_err(|e| {
318 error::IllegalConfigSnafu {
319 msg: format!("Failed to convert region schema to DataFusion schema: {e}"),
320 }
321 .build()
322 })?;
323
324 let state = SessionStateBuilder::new()
325 .with_config(Default::default())
326 .with_runtime_env(Default::default())
327 .with_default_features()
328 .build();
329
330 let exprs: Vec<DfExpr> = filters
331 .iter()
332 .enumerate()
333 .map(|(idx, filter)| {
334 let mut parser = SqlParser::new(&GenericDialect {})
335 .try_with_sql(filter)
336 .map_err(|e| {
337 error::IllegalConfigSnafu {
338 msg: format!("Invalid filter at index {idx} ('{filter}'): {e}"),
339 }
340 .build()
341 })?;
342
343 let sql_expr = parser.parse_expr().map_err(|e| {
344 error::IllegalConfigSnafu {
345 msg: format!("Invalid filter at index {idx} ('{filter}'): {e}"),
346 }
347 .build()
348 })?;
349
350 state
351 .create_logical_expr_from_sql_expr(
352 SqlExprWithAlias {
353 expr: sql_expr,
354 alias: None,
355 },
356 &df_schema,
357 )
358 .map_err(|e| {
359 error::IllegalConfigSnafu {
360 msg: format!(
361 "Failed to convert filter at index {idx} ('{filter}') to logical expr: {e}"
362 ),
363 }
364 .build()
365 })
366 })
367 .collect::<error::Result<Vec<_>>>()?;
368
369 let df_schema_ref = Arc::new(df_schema);
370 convert_literal_types(exprs, &df_schema_ref).map_err(|e| {
371 error::IllegalConfigSnafu {
372 msg: format!("Failed to convert filter expression types: {e}"),
373 }
374 .build()
375 })
376}
377
378fn noop_partition_expr_fetcher() -> mito2::region::opener::PartitionExprFetcherRef {
379 struct NoopPartitionExprFetcher;
380
381 #[async_trait::async_trait]
382 impl mito2::region::opener::PartitionExprFetcher for NoopPartitionExprFetcher {
383 async fn fetch_expr(&self, _region_id: RegionId) -> Option<String> {
384 None
385 }
386 }
387
388 Arc::new(NoopPartitionExprFetcher)
389}
390
391struct EngineComponents {
392 data_home: String,
393 mito_config: MitoConfig,
394 object_store_manager: Arc<ObjectStoreManager>,
395 schema_metadata_manager: Arc<SchemaMetadataManager>,
396 file_ref_manager: Arc<FileReferenceManager>,
397 partition_expr_fetcher: mito2::region::opener::PartitionExprFetcherRef,
398}
399
400impl EngineComponents {
401 async fn build<S: store_api::logstore::LogStore>(
402 self,
403 log_store: Arc<S>,
404 ) -> error::Result<MitoEngine> {
405 MitoEngine::new(
406 &self.data_home,
407 self.mito_config,
408 log_store,
409 self.object_store_manager,
410 self.schema_metadata_manager,
411 self.file_ref_manager,
412 self.partition_expr_fetcher,
413 Plugins::default(),
414 )
415 .await
416 .map_err(BoxedError::new)
417 .context(error::BuildCliSnafu)
418 }
419}
420
421fn mock_schema_metadata_manager() -> Arc<SchemaMetadataManager> {
422 let kv_backend = Arc::new(MemoryKvBackend::new());
423 let table_schema_cache = Arc::new(new_table_schema_cache(
424 "table_schema_name_cache".to_string(),
425 CacheBuilder::default().build(),
426 kv_backend.clone(),
427 ));
428 let schema_cache = Arc::new(new_schema_cache(
429 "schema_cache".to_string(),
430 CacheBuilder::default().build(),
431 kv_backend.clone(),
432 ));
433 Arc::new(SchemaMetadataManager::new(table_schema_cache, schema_cache))
434}
435
436impl ScanbenchCommand {
437 pub async fn run(&self) -> error::Result<()> {
438 if self.verbose {
439 common_telemetry::init_default_ut_logging();
440 }
441
442 println!("{}", "Starting scanbench...".cyan().bold());
443
444 let region_id = parse_region_id(&self.region_id)?;
445 let path_type = parse_path_type(&self.path_type)?;
446 println!(
447 "{} Region ID: {} (u64: {})",
448 "✓".green(),
449 self.region_id,
450 region_id.as_u64()
451 );
452
453 let (store_cfg, mito_config, wal_config) = parse_config(&self.config)?;
455 println!("{} Config parsed", "✓".green());
456
457 let object_store = build_object_store(&store_cfg).await?;
458 println!("{} Object store initialized", "✓".green());
459
460 let object_store_manager =
461 Arc::new(ObjectStoreManager::new("default", object_store.clone()));
462
463 let schema_metadata_manager = mock_schema_metadata_manager();
465 let file_ref_manager = Arc::new(FileReferenceManager::new(None));
466 let partition_expr_fetcher = noop_partition_expr_fetcher();
467
468 let components = EngineComponents {
470 data_home: store_cfg.data_home.clone(),
471 mito_config,
472 object_store_manager,
473 schema_metadata_manager,
474 file_ref_manager,
475 partition_expr_fetcher,
476 };
477
478 let engine = match &wal_config {
479 DatanodeWalConfig::RaftEngine(raft_engine_config) if self.enable_wal => {
480 let data_home = normalize_dir(&store_cfg.data_home);
481 let wal_dir = match &raft_engine_config.dir {
482 Some(dir) => dir.clone(),
483 None => format!("{}{WAL_DIR}", data_home),
484 };
485 fs::create_dir_all(&wal_dir).await.map_err(|e| {
486 error::IllegalConfigSnafu {
487 msg: format!("failed to create WAL directory {}: {e}", wal_dir),
488 }
489 .build()
490 })?;
491 let log_store = Arc::new(
492 RaftEngineLogStore::try_new(wal_dir, raft_engine_config)
493 .await
494 .map_err(BoxedError::new)
495 .context(error::BuildCliSnafu)?,
496 );
497 println!("{} Using RaftEngine WAL", "✓".green());
498 components.build(log_store).await?
499 }
500 DatanodeWalConfig::Kafka(kafka_config) if self.enable_wal => {
501 let log_store = Arc::new(
502 KafkaLogStore::try_new(kafka_config, None)
503 .await
504 .map_err(BoxedError::new)
505 .context(error::BuildCliSnafu)?,
506 );
507 println!("{} Using Kafka WAL", "✓".green());
508 components.build(log_store).await?
509 }
510 _ => {
511 let log_store = Arc::new(NoopLogStore);
512 println!(
513 "{} Using NoopLogStore (enable_wal={})",
514 "✓".green(),
515 self.enable_wal
516 );
517 components.build(log_store).await?
518 }
519 };
520
521 let open_request = RegionOpenRequest {
523 engine: "mito".to_string(),
524 table_dir: self.table_dir.clone(),
525 path_type,
526 options: HashMap::default(),
527 skip_wal_replay: !self.enable_wal,
528 checkpoint: None,
529 };
530
531 engine
532 .handle_request(region_id, RegionRequest::Open(open_request))
533 .await
534 .map_err(BoxedError::new)
535 .context(error::BuildCliSnafu)?;
536 println!("{} Region opened", "✓".green());
537
538 let scan_config = if let Some(path) = &self.scan_config {
540 let content = tokio::fs::read_to_string(path)
541 .await
542 .context(error::FileIoSnafu)?;
543 serde_json::from_str::<ScanConfig>(&content).context(error::SerdeJsonSnafu)?
544 } else {
545 ScanConfig::default()
546 };
547 let metadata = engine
548 .get_metadata(region_id)
549 .await
550 .map_err(BoxedError::new)
551 .context(error::BuildCliSnafu)?;
552 let projection = resolve_projection(&scan_config, Some(&metadata))?;
553 let filters = resolve_filters(&scan_config, &metadata)?;
554
555 let distribution = match self.scanner.as_str() {
557 "seq" => None,
558 "unordered" => Some(TimeSeriesDistribution::TimeWindowed),
559 "series" => Some(TimeSeriesDistribution::PerSeries),
560 other => {
561 return Err(error::IllegalConfigSnafu {
562 msg: format!(
563 "Unknown scanner type '{}', expected: seq, unordered, series",
564 other
565 ),
566 }
567 .build());
568 }
569 };
570
571 let series_row_selector = match scan_config.series_row_selector.as_deref() {
572 Some("last_row") => Some(TimeSeriesRowSelector::LastRow),
573 Some(other) => {
574 return Err(error::IllegalConfigSnafu {
575 msg: format!("Unknown series_row_selector '{}'", other),
576 }
577 .build());
578 }
579 None => None,
580 };
581
582 println!(
583 "{} Scanner: {}, Parallelism: {}, Iterations: {}, Force flat format: {}",
584 "ℹ".blue(),
585 self.scanner,
586 self.parallelism,
587 self.iterations,
588 self.force_flat_format,
589 );
590
591 #[cfg(unix)]
593 let mut profiler_guard = if self.pprof_file.is_some() && !self.pprof_after_warmup {
594 println!("{} Starting profiling...", "⚡".yellow());
595 Some(
596 pprof::ProfilerGuardBuilder::default()
597 .frequency(99)
598 .blocklist(&["libc", "libgcc", "pthread", "vdso"])
599 .build()
600 .map_err(|e| {
601 BoxedError::new(PlainError::new(
602 format!("Failed to start profiler: {e}"),
603 StatusCode::Unexpected,
604 ))
605 })
606 .context(error::BuildCliSnafu)?,
607 )
608 } else {
609 None
610 };
611
612 #[cfg(not(unix))]
613 if self.pprof_file.is_some() {
614 eprintln!(
615 "{}: Profiling is not supported on this platform",
616 "Warning".yellow()
617 );
618 }
619
620 let mut total_rows_all = 0u64;
621 let mut total_elapsed_all = std::time::Duration::ZERO;
622
623 for iteration in 0..self.iterations {
624 let request = ScanRequest {
625 projection: projection.clone(),
626 filters: filters.clone(),
627 series_row_selector,
628 distribution,
629 force_flat_format: self.force_flat_format,
630 ..Default::default()
631 };
632
633 let start = Instant::now();
634
635 let mut scanner = engine
637 .handle_query(region_id, request)
638 .await
639 .map_err(BoxedError::new)
640 .context(error::BuildCliSnafu)?;
641
642 let original_partitions = scanner.properties().partitions.clone();
644 let total_ranges: usize = original_partitions.iter().map(|p| p.len()).sum();
645
646 if self.verbose {
647 println!(
648 " {} Original partitions: {}, total ranges: {}",
649 "ℹ".blue(),
650 original_partitions.len(),
651 total_ranges
652 );
653 }
654
655 if self.parallelism > 1 {
656 let all_ranges: Vec<_> = original_partitions.into_iter().flatten().collect();
658
659 let mut partitions =
661 ParallelizeScan::assign_partition_range(all_ranges, self.parallelism);
662
663 for partition in &mut partitions {
665 partition.sort_by_key(|a| a.start);
666 }
667
668 scanner
669 .prepare(
670 PrepareRequest::default()
671 .with_ranges(partitions)
672 .with_target_partitions(self.parallelism),
673 )
674 .map_err(BoxedError::new)
675 .context(error::BuildCliSnafu)?;
676 }
677
678 let num_partitions = scanner.properties().partitions.len();
680 let ctx = QueryScanContext {
681 explain_verbose: self.verbose,
682 };
683 let metrics_set = ExecutionPlanMetricsSet::new();
684
685 let mut scan_futures = FuturesUnordered::new();
686
687 for partition_idx in 0..num_partitions {
688 let mut stream = scanner
689 .scan_partition(&ctx, &metrics_set, partition_idx)
690 .map_err(BoxedError::new)
691 .context(error::BuildCliSnafu)?;
692
693 scan_futures.push(tokio::spawn(async move {
694 let mut rows = 0u64;
695 let mut array_mem_size = 0u64;
696 let mut estimated_size = 0u64;
697 while let Some(batch_result) = stream.next().await {
698 match batch_result {
699 Ok(batch) => {
700 rows += batch.num_rows() as u64;
701 let df_batch = batch.df_record_batch();
702 array_mem_size += df_batch.get_array_memory_size() as u64;
703 estimated_size +=
704 mito2::memtable::record_batch_estimated_size(df_batch) as u64;
705 }
706 Err(e) => {
707 return Err(BoxedError::new(e));
708 }
709 }
710 }
711 Ok::<(u64, u64, u64), BoxedError>((rows, array_mem_size, estimated_size))
712 }));
713 }
714
715 let mut total_rows = 0u64;
716 let mut total_array_mem_size = 0u64;
717 let mut total_estimated_size = 0u64;
718 while let Some(task) = scan_futures.next().await {
719 let result = task
720 .map_err(|e| {
721 BoxedError::new(PlainError::new(
722 format!("scan task failed: {e}"),
723 StatusCode::Unexpected,
724 ))
725 })
726 .context(error::BuildCliSnafu)?;
727 let (rows, array_mem_size, estimated_size) =
728 result.context(error::BuildCliSnafu)?;
729 total_rows += rows;
730 total_array_mem_size += array_mem_size;
731 total_estimated_size += estimated_size;
732 }
733
734 let elapsed = start.elapsed();
735 total_rows_all += total_rows;
736 total_elapsed_all += elapsed;
737
738 println!(
739 " [iter {}] {} rows in {:?} ({} partitions), array_mem_size: {}, estimated_size: {}",
740 iteration + 1,
741 total_rows.to_string().cyan(),
742 elapsed,
743 num_partitions,
744 format_bytes(total_array_mem_size),
745 format_bytes(total_estimated_size),
746 );
747
748 #[cfg(unix)]
750 if iteration == 0
751 && self.pprof_after_warmup
752 && self.pprof_file.is_some()
753 && profiler_guard.is_none()
754 {
755 println!(
756 "{} Starting profiling after warmup iteration...",
757 "⚡".yellow()
758 );
759 profiler_guard = Some(
760 pprof::ProfilerGuardBuilder::default()
761 .frequency(99)
762 .blocklist(&["libc", "libgcc", "pthread", "vdso"])
763 .build()
764 .map_err(|e| {
765 BoxedError::new(PlainError::new(
766 format!("Failed to start profiler: {e}"),
767 StatusCode::Unexpected,
768 ))
769 })
770 .context(error::BuildCliSnafu)?,
771 );
772 }
773 }
774
775 #[cfg(unix)]
777 if let (Some(guard), Some(pprof_file)) = (profiler_guard, &self.pprof_file) {
778 println!("{} Generating flamegraph...", "🔥".yellow());
779 match guard.report().build() {
780 Ok(report) => {
781 let mut flamegraph_data = Vec::new();
782 if let Err(e) = report.flamegraph(&mut flamegraph_data) {
783 println!("{}: Failed to generate flamegraph: {}", "Error".red(), e);
784 } else if let Err(e) = std::fs::write(pprof_file, flamegraph_data) {
785 println!(
786 "{}: Failed to write flamegraph to {}: {}",
787 "Error".red(),
788 pprof_file.display(),
789 e
790 );
791 } else {
792 println!(
793 "{} Flamegraph saved to {}",
794 "✓".green(),
795 pprof_file.display().to_string().cyan()
796 );
797 }
798 }
799 Err(e) => {
800 println!("{}: Failed to generate pprof report: {}", "Error".red(), e);
801 }
802 }
803 }
804
805 if self.iterations > 1 {
807 let avg_elapsed = total_elapsed_all / self.iterations as u32;
808 let avg_rows = total_rows_all / self.iterations as u64;
809 println!(
810 "\n{} Average: {} rows in {:?} over {} iterations",
811 "Summary".green().bold(),
812 avg_rows.to_string().cyan(),
813 avg_elapsed,
814 self.iterations,
815 );
816 }
817
818 println!("\n{}", "Benchmark completed!".green().bold());
819 Ok(())
820 }
821}
822
823#[cfg(test)]
824mod tests {
825 use datatypes::prelude::ConcreteDataType;
826 use datatypes::schema::ColumnSchema;
827 use sqlparser::ast::{BinaryOperator, Expr};
828 use sqlparser::dialect::GenericDialect;
829 use sqlparser::parser::Parser;
830 use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
831 use store_api::storage::RegionId;
832
833 use super::{ScanConfig, resolve_filters, resolve_projection};
834 use crate::error;
835
836 #[test]
837 fn test_parse_scan_config_projection_names() {
838 let json = r#"{"projection_names":["host","ts"]}"#;
839 let config: ScanConfig = serde_json::from_str(json).unwrap();
840
841 assert_eq!(
842 config.projection_names,
843 Some(vec!["host".to_string(), "ts".to_string()])
844 );
845 assert_eq!(config.projection, None);
846 }
847
848 #[test]
849 fn test_resolve_projection_by_indexes() -> error::Result<()> {
850 let config = ScanConfig {
851 projection: Some(vec![0, 2]),
852 projection_names: None,
853 filters: None,
854 series_row_selector: None,
855 };
856
857 let projection = resolve_projection(&config, None)?;
858 assert_eq!(projection, Some(vec![0, 2]));
859 Ok(())
860 }
861
862 #[test]
863 fn test_resolve_projection_by_names_without_metadata() {
864 let config = ScanConfig {
865 projection: None,
866 projection_names: Some(vec!["cpu".to_string(), "host".to_string()]),
867 filters: None,
868 series_row_selector: None,
869 };
870
871 let err = resolve_projection(&config, None).unwrap_err();
872 assert!(
873 err.to_string()
874 .contains("Missing region metadata while resolving 'projection_names'")
875 );
876 }
877
878 #[test]
879 fn test_resolve_projection_conflict_fields() {
880 let config = ScanConfig {
881 projection: Some(vec![0]),
882 projection_names: Some(vec!["host".to_string()]),
883 filters: None,
884 series_row_selector: None,
885 };
886
887 let err = resolve_projection(&config, None).unwrap_err();
888 let msg = err.to_string();
889 assert!(msg.contains("projection"));
890 assert!(msg.contains("projection_names"));
891 }
892
893 #[test]
894 fn test_sqlparser_parse_expr_string() {
895 let dialect = GenericDialect {};
896 let mut parser = Parser::new(&dialect)
897 .try_with_sql("host = 'web-1' AND cpu > 80")
898 .unwrap();
899
900 let expr = parser.parse_expr().unwrap();
901
902 match expr {
903 Expr::BinaryOp { op, .. } => assert_eq!(op, BinaryOperator::And),
904 other => panic!("expected BinaryOp, got: {other:?}"),
905 }
906 }
907
908 #[test]
909 fn test_resolve_filters_uint32_type_conversion() {
910 use api::v1::SemanticType;
911
912 let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 0));
913 builder
914 .push_column_metadata(ColumnMetadata {
915 column_schema: ColumnSchema::new(
916 "table_id",
917 ConcreteDataType::uint32_datatype(),
918 false,
919 ),
920 semantic_type: SemanticType::Tag,
921 column_id: 1,
922 })
923 .push_column_metadata(ColumnMetadata {
924 column_schema: ColumnSchema::new(
925 "ts",
926 ConcreteDataType::timestamp_millisecond_datatype(),
927 false,
928 ),
929 semantic_type: SemanticType::Timestamp,
930 column_id: 2,
931 })
932 .primary_key(vec![1]);
933 let metadata = builder.build().unwrap();
934
935 let config = ScanConfig {
936 projection: None,
937 projection_names: None,
938 filters: Some(vec!["table_id = 1117".to_string()]),
939 series_row_selector: None,
940 };
941
942 let exprs = resolve_filters(&config, &metadata).unwrap();
943 assert_eq!(exprs.len(), 1);
944 let expr_str = format!("{}", exprs[0]);
946 assert!(
947 expr_str.contains("UInt32(1117)"),
948 "Expected UInt32(1117) in expression, got: {expr_str}"
949 );
950 }
951
952 #[test]
953 fn test_parse_scan_config_filters() {
954 let json = r#"{"filters":["host = 'web-1'","cpu > 80"]}"#;
955 let config: ScanConfig = serde_json::from_str(json).unwrap();
956
957 assert_eq!(
958 config.filters,
959 Some(vec!["host = 'web-1'".to_string(), "cpu > 80".to_string()])
960 );
961 }
962}