1use std::collections::HashSet;
18use std::io::{self, Write};
19use std::time::Duration;
20
21use async_trait::async_trait;
22use clap::{Parser, Subcommand};
23use common_error::ext::BoxedError;
24use common_telemetry::info;
25use serde_json::Value;
26use snafu::{OptionExt, ResultExt};
27
28use crate::Tool;
29use crate::common::ObjectStoreConfig;
30use crate::data::export_v2::coordinator::{ExportDataOptions, export_data};
31use crate::data::export_v2::error::{
32 ChunkTimeWindowRequiresBoundsSnafu, DatabaseSnafu, EmptyResultSnafu, IoSnafu,
33 ManifestVersionMismatchSnafu, Result, ResumeConfigMismatchSnafu, SchemaOnlyArgsNotAllowedSnafu,
34 SchemaOnlyModeMismatchSnafu, SnapshotVerifyFailedSnafu, UnexpectedValueTypeSnafu,
35};
36use crate::data::export_v2::extractor::SchemaExtractor;
37use crate::data::export_v2::manifest::{
38 ChunkMeta, ChunkStatus, DataFormat, MANIFEST_FILE, MANIFEST_VERSION, Manifest, TimeRange,
39};
40use crate::data::export_v2::schema::{DDL_DIR, SCHEMA_DIR, SCHEMAS_FILE};
41use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
42use crate::data::progress::{ProgressMode, build_progress_reporter};
43use crate::data::snapshot_storage::{
44 OpenDalStorage, SnapshotStorage, validate_snapshot_uri, validate_uri,
45};
46use crate::data::sql::{escape_sql_identifier, escape_sql_literal};
47use crate::database::{DatabaseClient, parse_proxy_opts};
48
49#[derive(Debug, Subcommand)]
51pub enum ExportV2Command {
52 Create(ExportCreateCommand),
54 List(ExportListCommand),
56 Verify(ExportVerifyCommand),
58 Delete(ExportDeleteCommand),
60}
61
62impl ExportV2Command {
63 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
64 match self {
65 ExportV2Command::Create(cmd) => cmd.build().await,
66 ExportV2Command::List(cmd) => cmd.build().await,
67 ExportV2Command::Verify(cmd) => cmd.build().await,
68 ExportV2Command::Delete(cmd) => cmd.build().await,
69 }
70 }
71}
72
73#[derive(Debug, Parser)]
75pub struct ExportListCommand {
76 #[clap(long)]
78 location: String,
79
80 #[clap(flatten)]
82 storage: ObjectStoreConfig,
83}
84
85impl ExportListCommand {
86 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
87 validate_uri(&self.location).map_err(BoxedError::new)?;
88 let storage = OpenDalStorage::from_parent_uri(&self.location, &self.storage)
89 .map_err(BoxedError::new)?;
90
91 Ok(Box::new(ExportList {
92 location: self.location.clone(),
93 storage,
94 }))
95 }
96}
97
98pub struct ExportList {
100 location: String,
101 storage: OpenDalStorage,
102}
103
104#[async_trait]
105impl Tool for ExportList {
106 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
107 self.run().await.map_err(BoxedError::new)
108 }
109}
110
111impl ExportList {
112 async fn run(&self) -> Result<()> {
113 let result = scan_snapshots(&self.storage).await?;
114
115 println!("Scanning: {}", self.location);
116 if result.snapshots.is_empty() {
117 println!("No snapshots found.");
118 } else {
119 print_snapshot_list(&result.snapshots, result.unreadable.len());
120 }
121 print_unreadable_warnings(&result.unreadable);
122
123 Ok(())
124 }
125}
126
127#[derive(Debug, Parser)]
129pub struct ExportVerifyCommand {
130 #[clap(long)]
132 snapshot: String,
133
134 #[clap(flatten)]
136 storage: ObjectStoreConfig,
137}
138
139impl ExportVerifyCommand {
140 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
141 validate_uri(&self.snapshot).map_err(BoxedError::new)?;
142 let storage =
143 OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
144
145 Ok(Box::new(ExportVerify {
146 snapshot: self.snapshot.clone(),
147 storage,
148 }))
149 }
150}
151
152pub struct ExportVerify {
154 snapshot: String,
155 storage: OpenDalStorage,
156}
157
158#[async_trait]
159impl Tool for ExportVerify {
160 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
161 self.run().await.map_err(BoxedError::new)
162 }
163}
164
165impl ExportVerify {
166 async fn run(&self) -> Result<()> {
167 let report = verify_snapshot(&self.storage).await?;
168 print_verify_report(&self.snapshot, &report);
169
170 if report.has_problems() {
171 return SnapshotVerifyFailedSnafu {
172 errors: report.error_count(),
173 warnings: report.warning_count(),
174 }
175 .fail();
176 }
177
178 Ok(())
179 }
180}
181
182#[derive(Debug, Parser)]
184pub struct ExportDeleteCommand {
185 #[clap(long)]
187 snapshot: String,
188
189 #[clap(long = "no-confirm", alias = "yes")]
191 skip_confirmation: bool,
192
193 #[clap(flatten)]
195 storage: ObjectStoreConfig,
196}
197
198impl ExportDeleteCommand {
199 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
200 validate_snapshot_uri(&self.snapshot).map_err(BoxedError::new)?;
201 let storage =
202 OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
203
204 Ok(Box::new(ExportDelete {
205 snapshot: self.snapshot.clone(),
206 skip_confirmation: self.skip_confirmation,
207 storage,
208 }))
209 }
210}
211
212pub struct ExportDelete {
214 snapshot: String,
215 skip_confirmation: bool,
216 storage: OpenDalStorage,
217}
218
219#[async_trait]
220impl Tool for ExportDelete {
221 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
222 self.run().await.map_err(BoxedError::new)
223 }
224}
225
226impl ExportDelete {
227 async fn run(&self) -> Result<()> {
228 self.run_with_confirmation(confirm_delete).await
229 }
230
231 async fn run_with_confirmation<F>(&self, confirm: F) -> Result<()>
232 where
233 F: FnOnce(&str) -> Result<bool>,
234 {
235 let manifest = self.storage.read_manifest().await?;
236 print_delete_summary(&self.snapshot, &manifest);
237
238 if !self.skip_confirmation && !confirm(&self.snapshot)? {
239 println!("Deletion cancelled.");
240 return Ok(());
241 }
242
243 println!("Deleting snapshot...");
244 self.storage.delete_snapshot().await?;
245 println!("Snapshot deleted successfully.");
246
247 Ok(())
248 }
249}
250
251#[derive(Debug, Parser)]
253pub struct ExportCreateCommand {
254 #[clap(long)]
256 addr: String,
257
258 #[clap(long)]
260 to: String,
261
262 #[clap(long, default_value = "greptime")]
264 catalog: String,
265
266 #[clap(long, value_delimiter = ',')]
269 schemas: Vec<String>,
270
271 #[clap(long)]
273 schema_only: bool,
274
275 #[clap(long)]
277 start_time: Option<String>,
278
279 #[clap(long)]
281 end_time: Option<String>,
282
283 #[clap(long, value_parser = humantime::parse_duration)]
286 chunk_time_window: Option<Duration>,
287
288 #[clap(long, value_enum, default_value = "parquet")]
290 format: DataFormat,
291
292 #[clap(long)]
294 force: bool,
295
296 #[clap(long, default_value = "1")]
298 parallelism: usize,
299
300 #[clap(long, default_value = "1", value_parser = parse_chunk_parallelism)]
302 chunk_parallelism: usize,
303
304 #[clap(long)]
306 auth_basic: Option<String>,
307
308 #[clap(long, value_parser = humantime::parse_duration)]
310 timeout: Option<Duration>,
311
312 #[clap(long)]
317 proxy: Option<String>,
318
319 #[clap(long)]
323 no_proxy: bool,
324
325 #[clap(long, value_enum, default_value_t = ProgressMode::Auto)]
327 progress: ProgressMode,
328
329 #[clap(flatten)]
331 storage: ObjectStoreConfig,
332}
333
334impl ExportCreateCommand {
335 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
336 validate_uri(&self.to).map_err(BoxedError::new)?;
338
339 let time_range = TimeRange::parse(self.start_time.as_deref(), self.end_time.as_deref())
340 .map_err(BoxedError::new)?;
341 if self.chunk_time_window.is_some() && !time_range.is_bounded() {
342 return ChunkTimeWindowRequiresBoundsSnafu
343 .fail()
344 .map_err(BoxedError::new);
345 }
346 if self.schema_only {
347 let mut invalid_args = Vec::new();
348 if self.start_time.is_some() {
349 invalid_args.push("--start-time");
350 }
351 if self.end_time.is_some() {
352 invalid_args.push("--end-time");
353 }
354 if self.chunk_time_window.is_some() {
355 invalid_args.push("--chunk-time-window");
356 }
357 if self.format != DataFormat::Parquet {
358 invalid_args.push("--format");
359 }
360 if self.parallelism != 1 {
361 invalid_args.push("--parallelism");
362 }
363 if self.chunk_parallelism != 1 {
364 invalid_args.push("--chunk-parallelism");
365 }
366 if !invalid_args.is_empty() {
367 return SchemaOnlyArgsNotAllowedSnafu {
368 args: invalid_args.join(", "),
369 }
370 .fail()
371 .map_err(BoxedError::new);
372 }
373 }
374
375 let schemas = if self.schemas.is_empty() {
377 None
378 } else {
379 Some(self.schemas.clone())
380 };
381
382 let storage = OpenDalStorage::from_uri(&self.to, &self.storage).map_err(BoxedError::new)?;
384
385 let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
387 let database_client = DatabaseClient::new(
388 self.addr.clone(),
389 self.catalog.clone(),
390 self.auth_basic.clone(),
391 self.timeout.unwrap_or(Duration::from_secs(60)),
392 proxy,
393 self.no_proxy,
394 );
395
396 Ok(Box::new(ExportCreate {
397 config: ExportConfig {
398 catalog: self.catalog.clone(),
399 schemas,
400 schema_only: self.schema_only,
401 format: self.format,
402 force: self.force,
403 time_range,
404 chunk_time_window: self.chunk_time_window,
405 parallelism: self.parallelism,
406 chunk_parallelism: self.chunk_parallelism,
407 progress: self.progress,
408 snapshot_uri: self.to.clone(),
409 storage_config: self.storage.clone(),
410 },
411 storage: Box::new(storage),
412 database_client,
413 }))
414 }
415}
416
417pub struct ExportCreate {
419 config: ExportConfig,
420 storage: Box<dyn SnapshotStorage>,
421 database_client: DatabaseClient,
422}
423
424struct ExportConfig {
425 catalog: String,
426 schemas: Option<Vec<String>>,
427 schema_only: bool,
428 format: DataFormat,
429 force: bool,
430 time_range: TimeRange,
431 chunk_time_window: Option<Duration>,
432 parallelism: usize,
433 chunk_parallelism: usize,
434 progress: ProgressMode,
435 snapshot_uri: String,
436 storage_config: ObjectStoreConfig,
437}
438
439fn parse_chunk_parallelism(value: &str) -> std::result::Result<usize, String> {
440 let parallelism = value
441 .parse::<usize>()
442 .map_err(|_| "chunk parallelism must be an integer between 1 and 64".to_string())?;
443 if (1..=64).contains(¶llelism) {
444 Ok(parallelism)
445 } else {
446 Err("chunk parallelism must be between 1 and 64".to_string())
447 }
448}
449
450#[async_trait]
451impl Tool for ExportCreate {
452 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
453 self.run().await.map_err(BoxedError::new)
454 }
455}
456
457impl ExportCreate {
458 async fn run(&self) -> Result<()> {
459 let exists = self.storage.exists().await?;
461
462 if exists {
463 if self.config.force {
464 info!("Deleting existing snapshot (--force)");
465 self.storage.delete_snapshot().await?;
466 } else {
467 let mut manifest = self.storage.read_manifest().await?;
469
470 if manifest.version != MANIFEST_VERSION {
472 return ManifestVersionMismatchSnafu {
473 expected: MANIFEST_VERSION,
474 found: manifest.version,
475 }
476 .fail();
477 }
478
479 validate_resume_config(&manifest, &self.config)?;
480
481 info!(
482 "Resuming existing snapshot: {} (completed: {}/{} chunks)",
483 manifest.snapshot_id,
484 manifest.completed_count(),
485 manifest.chunks.len()
486 );
487
488 if manifest.is_complete() {
489 info!("Snapshot is already complete");
490 return Ok(());
491 }
492
493 if manifest.schema_only {
494 return Ok(());
495 }
496
497 let progress = build_progress_reporter(self.config.progress);
498 export_data(
499 self.storage.as_ref(),
500 &self.database_client,
501 &mut manifest,
502 ExportDataOptions {
503 snapshot_uri: &self.config.snapshot_uri,
504 storage_config: &self.config.storage_config,
505 parallelism: self.config.parallelism,
506 chunk_parallelism: self.config.chunk_parallelism,
507 },
508 progress.as_ref(),
509 )
510 .await?;
511 return Ok(());
512 }
513 }
514
515 let extractor = SchemaExtractor::new(&self.database_client, &self.config.catalog);
517 let schema_snapshot = extractor.extract(self.config.schemas.as_deref()).await?;
518
519 let schema_names: Vec<String> = schema_snapshot
520 .schemas
521 .iter()
522 .map(|s| s.name.clone())
523 .collect();
524 info!("Exporting schemas: {:?}", schema_names);
525
526 let mut manifest = Manifest::new_for_export(
528 self.config.catalog.clone(),
529 schema_names.clone(),
530 self.config.schema_only,
531 self.config.time_range.clone(),
532 self.config.format,
533 self.config.chunk_time_window,
534 )?;
535
536 self.storage.write_schema(&schema_snapshot).await?;
538 info!("Exported {} schemas", schema_snapshot.schemas.len());
539
540 let ddl_by_schema = self.build_ddl_by_schema(&schema_names).await?;
542 for (schema, ddl) in ddl_by_schema {
543 let ddl_path = ddl_path_for_schema(&schema);
544 self.storage.write_text(&ddl_path, &ddl).await?;
545 info!("Exported DDL for schema {} to {}", schema, ddl_path);
546 }
547
548 self.storage.write_manifest(&manifest).await?;
556 info!("Snapshot created: {}", manifest.snapshot_id);
557
558 if !self.config.schema_only {
559 let progress = build_progress_reporter(self.config.progress);
560 export_data(
561 self.storage.as_ref(),
562 &self.database_client,
563 &mut manifest,
564 ExportDataOptions {
565 snapshot_uri: &self.config.snapshot_uri,
566 storage_config: &self.config.storage_config,
567 parallelism: self.config.parallelism,
568 chunk_parallelism: self.config.chunk_parallelism,
569 },
570 progress.as_ref(),
571 )
572 .await?;
573 }
574
575 Ok(())
576 }
577
578 async fn build_ddl_by_schema(&self, schema_names: &[String]) -> Result<Vec<(String, String)>> {
579 let mut schemas = schema_names.to_vec();
580 schemas.sort();
581
582 let mut ddl_by_schema = Vec::with_capacity(schemas.len());
583 for schema in schemas {
584 let create_database = self.show_create("DATABASE", &schema, None).await?;
585
586 let (mut physical_tables, mut tables, mut views) =
587 self.get_schema_objects(&schema).await?;
588 physical_tables.sort();
589 let mut physical_ddls = Vec::with_capacity(physical_tables.len());
590 for table in physical_tables {
591 physical_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
592 }
593
594 tables.sort();
595 let mut table_ddls = Vec::with_capacity(tables.len());
596 for table in tables {
597 table_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
598 }
599
600 views.sort();
601 let mut view_ddls = Vec::with_capacity(views.len());
602 for view in views {
603 view_ddls.push(self.show_create("VIEW", &schema, Some(&view)).await?);
604 }
605
606 let ddl = build_schema_ddl(
607 &schema,
608 create_database,
609 physical_ddls,
610 table_ddls,
611 view_ddls,
612 );
613 ddl_by_schema.push((schema, ddl));
614 }
615
616 Ok(ddl_by_schema)
617 }
618
619 async fn get_schema_objects(
620 &self,
621 schema: &str,
622 ) -> Result<(Vec<String>, Vec<String>, Vec<String>)> {
623 let physical_tables = self.get_metric_physical_tables(schema).await?;
624 let physical_set: HashSet<&str> = physical_tables.iter().map(String::as_str).collect();
625 let sql = format!(
626 "SELECT table_name, table_type FROM information_schema.tables \
627 WHERE table_catalog = '{}' AND table_schema = '{}' \
628 AND (table_type = 'BASE TABLE' OR table_type = 'VIEW')",
629 escape_sql_literal(&self.config.catalog),
630 escape_sql_literal(schema)
631 );
632 let records: Option<Vec<Vec<Value>>> = self
633 .database_client
634 .sql_in_public(&sql)
635 .await
636 .context(DatabaseSnafu)?;
637
638 let mut tables = Vec::new();
639 let mut views = Vec::new();
640 if let Some(rows) = records {
641 for row in rows {
642 let name = match row.first() {
643 Some(Value::String(name)) => name.clone(),
644 _ => return UnexpectedValueTypeSnafu.fail(),
645 };
646 let table_type = match row.get(1) {
647 Some(Value::String(table_type)) => table_type.as_str(),
648 _ => return UnexpectedValueTypeSnafu.fail(),
649 };
650 if !physical_set.contains(name.as_str()) {
651 if table_type == "VIEW" {
652 views.push(name);
653 } else {
654 tables.push(name);
655 }
656 }
657 }
658 }
659
660 Ok((physical_tables, tables, views))
661 }
662
663 async fn get_metric_physical_tables(&self, schema: &str) -> Result<Vec<String>> {
664 let sql = format!(
665 "SELECT DISTINCT table_name FROM information_schema.columns \
666 WHERE table_catalog = '{}' AND table_schema = '{}' AND column_name = '__tsid'",
667 escape_sql_literal(&self.config.catalog),
668 escape_sql_literal(schema)
669 );
670 let records: Option<Vec<Vec<Value>>> = self
671 .database_client
672 .sql_in_public(&sql)
673 .await
674 .context(DatabaseSnafu)?;
675
676 let mut tables = HashSet::new();
677 if let Some(rows) = records {
678 for row in rows {
679 let name = match row.first() {
680 Some(Value::String(name)) => name.clone(),
681 _ => return UnexpectedValueTypeSnafu.fail(),
682 };
683 tables.insert(name);
684 }
685 }
686
687 Ok(tables.into_iter().collect())
688 }
689
690 async fn show_create(
691 &self,
692 show_type: &str,
693 schema: &str,
694 table: Option<&str>,
695 ) -> Result<String> {
696 let sql = match table {
697 Some(table) => format!(
698 r#"SHOW CREATE {} "{}"."{}"."{}""#,
699 show_type,
700 escape_sql_identifier(&self.config.catalog),
701 escape_sql_identifier(schema),
702 escape_sql_identifier(table)
703 ),
704 None => format!(
705 r#"SHOW CREATE {} "{}"."{}""#,
706 show_type,
707 escape_sql_identifier(&self.config.catalog),
708 escape_sql_identifier(schema)
709 ),
710 };
711
712 let records: Option<Vec<Vec<Value>>> = self
713 .database_client
714 .sql_in_public(&sql)
715 .await
716 .context(DatabaseSnafu)?;
717 let rows = records.context(EmptyResultSnafu)?;
718 let row = rows.first().context(EmptyResultSnafu)?;
719 let Some(Value::String(create)) = row.get(1) else {
720 return UnexpectedValueTypeSnafu.fail();
721 };
722
723 Ok(format!("{};\n", create))
724 }
725}
726
727fn build_schema_ddl(
728 schema: &str,
729 create_database: String,
730 physical_tables: Vec<String>,
731 tables: Vec<String>,
732 views: Vec<String>,
733) -> String {
734 let mut ddl = String::new();
735 ddl.push_str(&format!("-- Schema: {}\n", schema));
736 ddl.push_str(&create_database);
737 for stmt in physical_tables {
738 ddl.push_str(&stmt);
739 }
740 for stmt in tables {
741 ddl.push_str(&stmt);
742 }
743 for stmt in views {
744 ddl.push_str(&stmt);
745 }
746 ddl.push('\n');
747 ddl
748}
749
750fn validate_resume_config(manifest: &Manifest, config: &ExportConfig) -> Result<()> {
751 if manifest.schema_only != config.schema_only {
752 return SchemaOnlyModeMismatchSnafu {
753 existing_schema_only: manifest.schema_only,
754 requested_schema_only: config.schema_only,
755 }
756 .fail();
757 }
758
759 if manifest.catalog != config.catalog {
760 return ResumeConfigMismatchSnafu {
761 field: "catalog",
762 existing: manifest.catalog.clone(),
763 requested: config.catalog.clone(),
764 }
765 .fail();
766 }
767
768 if let Some(requested_schemas) = &config.schemas
771 && !schema_selection_matches(&manifest.schemas, requested_schemas)
772 {
773 return ResumeConfigMismatchSnafu {
774 field: "schemas",
775 existing: format_schema_selection(&manifest.schemas),
776 requested: format_schema_selection(requested_schemas),
777 }
778 .fail();
779 }
780
781 if manifest.time_range != config.time_range {
782 return ResumeConfigMismatchSnafu {
783 field: "time_range",
784 existing: format!("{:?}", manifest.time_range),
785 requested: format!("{:?}", config.time_range),
786 }
787 .fail();
788 }
789
790 if manifest.format != config.format {
791 return ResumeConfigMismatchSnafu {
792 field: "format",
793 existing: manifest.format.to_string(),
794 requested: config.format.to_string(),
795 }
796 .fail();
797 }
798
799 let expected_plan = Manifest::new_for_export(
800 manifest.catalog.clone(),
801 manifest.schemas.clone(),
802 config.schema_only,
803 config.time_range.clone(),
804 config.format,
805 config.chunk_time_window,
806 )?;
807 if !chunk_plan_matches(manifest, &expected_plan) {
808 return ResumeConfigMismatchSnafu {
809 field: "chunk plan",
810 existing: format_chunk_plan(&manifest.chunks),
811 requested: format_chunk_plan(&expected_plan.chunks),
812 }
813 .fail();
814 }
815
816 Ok(())
817}
818
819fn schema_selection_matches(existing: &[String], requested: &[String]) -> bool {
820 canonical_schema_selection(existing) == canonical_schema_selection(requested)
821}
822
823fn canonical_schema_selection(schemas: &[String]) -> Vec<String> {
824 let mut canonicalized = Vec::new();
825 let mut seen = HashSet::new();
826
827 for schema in schemas {
828 let normalized = schema.to_ascii_lowercase();
829 if seen.insert(normalized.clone()) {
830 canonicalized.push(normalized);
831 }
832 }
833
834 canonicalized.sort();
835 canonicalized
836}
837
838fn format_schema_selection(schemas: &[String]) -> String {
839 format!("[{}]", schemas.join(", "))
840}
841
842fn chunk_plan_matches(existing: &Manifest, expected: &Manifest) -> bool {
843 existing.chunks.len() == expected.chunks.len()
844 && existing
845 .chunks
846 .iter()
847 .zip(&expected.chunks)
848 .all(|(left, right)| left.id == right.id && left.time_range == right.time_range)
849}
850
851fn format_chunk_plan(chunks: &[ChunkMeta]) -> String {
852 let items = chunks
853 .iter()
854 .map(|chunk| format!("#{}:{:?}", chunk.id, chunk.time_range))
855 .collect::<Vec<_>>();
856 format!("[{}]", items.join(", "))
857}
858
859#[derive(Debug)]
860struct SnapshotListEntry {
861 path: String,
862 manifest: Manifest,
863}
864
865#[derive(Debug, Default)]
866struct SnapshotScanResult {
867 snapshots: Vec<SnapshotListEntry>,
868 unreadable: Vec<String>,
869}
870
871async fn scan_snapshots(storage: &OpenDalStorage) -> Result<SnapshotScanResult> {
872 let mut result = SnapshotScanResult::default();
873 for dir in storage.list_direct_child_dirs().await? {
874 let manifest_path = format!("{}/{}", dir.trim_matches('/'), MANIFEST_FILE);
875 let Some(data) = storage.read_file_if_exists(&manifest_path).await? else {
876 continue;
877 };
878
879 match serde_json::from_slice::<Manifest>(&data) {
880 Ok(manifest) => result.snapshots.push(SnapshotListEntry {
881 path: format!("{}/", dir.trim_matches('/')),
882 manifest,
883 }),
884 Err(_) => result
885 .unreadable
886 .push(format!("{}/", dir.trim_matches('/'))),
887 }
888 }
889
890 result
891 .snapshots
892 .sort_by_key(|entry| std::cmp::Reverse(entry.manifest.created_at));
893 result.unreadable.sort();
894 Ok(result)
895}
896
897fn print_snapshot_list(snapshots: &[SnapshotListEntry], unreadable_count: usize) {
898 if unreadable_count == 0 {
899 println!("Found {} snapshots:", snapshots.len());
900 } else {
901 println!(
902 "Found {} snapshots ({} {} skipped: unreadable manifest):",
903 snapshots.len(),
904 unreadable_count,
905 directory_word(unreadable_count)
906 );
907 }
908 println!();
909 println!(
910 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} Status",
911 "Path", "ID", "Created", "Catalog", "Schemas", "Chunks"
912 );
913 println!(
914 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} {:<10}",
915 "-".repeat(24),
916 "-".repeat(36),
917 "-".repeat(19),
918 "-".repeat(9),
919 "-".repeat(7),
920 "-".repeat(6),
921 "-".repeat(10)
922 );
923 for entry in snapshots {
924 let manifest = &entry.manifest;
925 println!(
926 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} {}",
927 entry.path,
928 manifest.snapshot_id,
929 manifest.created_at.format("%Y-%m-%d %H:%M:%S"),
930 manifest.catalog,
931 manifest.schemas.len(),
932 format_list_chunks(manifest),
933 snapshot_status(manifest)
934 );
935 }
936}
937
938fn print_unreadable_warnings(unreadable: &[String]) {
939 if unreadable.is_empty() {
940 return;
941 }
942
943 println!();
944 println!(
945 "Warning: {} {} had corrupt/unreadable manifest.json:",
946 unreadable.len(),
947 directory_word(unreadable.len())
948 );
949 for path in unreadable {
950 println!(" - {}", path);
951 }
952}
953
954fn directory_word(count: usize) -> &'static str {
955 if count == 1 {
956 "directory"
957 } else {
958 "directories"
959 }
960}
961
962fn snapshot_status(manifest: &Manifest) -> &'static str {
963 if manifest.schema_only {
964 "schema-only"
965 } else if manifest.is_complete() {
966 "complete"
967 } else {
968 "incomplete"
969 }
970}
971
972fn format_list_chunks(manifest: &Manifest) -> String {
973 let total = manifest.chunks.len();
974 if total == 0 {
975 return "0".to_string();
976 }
977
978 format!(
979 "{}/{}",
980 manifest.completed_count() + manifest.skipped_count(),
981 total
982 )
983}
984
985#[derive(Debug, Clone, Copy, PartialEq, Eq)]
986enum VerifySeverity {
987 Error,
988 Warn,
989}
990
991impl VerifySeverity {
992 fn as_str(self) -> &'static str {
993 match self {
994 VerifySeverity::Error => "ERROR",
995 VerifySeverity::Warn => "WARN",
996 }
997 }
998}
999
1000#[derive(Debug)]
1001struct VerifyProblem {
1002 severity: VerifySeverity,
1003 message: String,
1004}
1005
1006#[derive(Debug, Default)]
1007struct VerifyChunkSummary {
1008 total: usize,
1009 completed: usize,
1010 skipped: usize,
1011 pending: usize,
1012 in_progress: usize,
1013 failed: usize,
1014}
1015
1016#[derive(Debug)]
1017struct VerifyReport {
1018 manifest: Manifest,
1019 schema_index_exists: bool,
1020 ddl_file_count: usize,
1021 chunk_summary: VerifyChunkSummary,
1022 data_files_total: usize,
1023 data_files_verified: usize,
1024 problems: Vec<VerifyProblem>,
1025}
1026
1027impl VerifyReport {
1028 fn error_count(&self) -> usize {
1029 self.problems
1030 .iter()
1031 .filter(|problem| problem.severity == VerifySeverity::Error)
1032 .count()
1033 }
1034
1035 fn warning_count(&self) -> usize {
1036 self.problems
1037 .iter()
1038 .filter(|problem| problem.severity == VerifySeverity::Warn)
1039 .count()
1040 }
1041
1042 fn has_problems(&self) -> bool {
1043 !self.problems.is_empty()
1044 }
1045
1046 fn push_error(&mut self, message: impl Into<String>) {
1047 self.problems.push(VerifyProblem {
1048 severity: VerifySeverity::Error,
1049 message: message.into(),
1050 });
1051 }
1052
1053 fn push_warn(&mut self, message: impl Into<String>) {
1054 self.problems.push(VerifyProblem {
1055 severity: VerifySeverity::Warn,
1056 message: message.into(),
1057 });
1058 }
1059}
1060
1061async fn verify_snapshot(storage: &OpenDalStorage) -> Result<VerifyReport> {
1062 let manifest = storage.read_manifest().await?;
1063 let schema_index_path = format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE);
1064 let ddl_prefix = format!("{}/{}/", SCHEMA_DIR, DDL_DIR);
1065 let schema_index_exists = storage.file_exists(&schema_index_path).await?;
1066 let ddl_files: HashSet<_> = storage
1067 .list_files_recursive(&ddl_prefix)
1068 .await?
1069 .into_iter()
1070 .collect();
1071 let ddl_file_count = ddl_files
1072 .iter()
1073 .filter(|path| path.ends_with(".sql"))
1074 .count();
1075
1076 let mut report = VerifyReport {
1077 manifest,
1078 schema_index_exists,
1079 ddl_file_count,
1080 chunk_summary: VerifyChunkSummary::default(),
1081 data_files_total: 0,
1082 data_files_verified: 0,
1083 problems: Vec::new(),
1084 };
1085
1086 if report.manifest.version != MANIFEST_VERSION {
1087 report.push_error(format!(
1088 "Manifest version mismatch: expected {}, found {}",
1089 MANIFEST_VERSION, report.manifest.version
1090 ));
1091 }
1092
1093 if !report.schema_index_exists {
1094 report.push_warn(format!("Missing schema index '{}'", schema_index_path));
1095 }
1096
1097 for schema in &report.manifest.schemas {
1098 let ddl_path = ddl_path_for_schema(schema);
1099 if !ddl_files.contains(ddl_path.as_str()) {
1100 report.problems.push(VerifyProblem {
1101 severity: VerifySeverity::Error,
1102 message: format!("Schema '{}': missing DDL file '{}'", schema, ddl_path),
1103 });
1104 }
1105 }
1106
1107 report.chunk_summary = summarize_chunks(&report.manifest);
1108 if report.manifest.schema_only {
1109 let chunk_count = report.manifest.chunks.len();
1110 if chunk_count > 0 {
1111 report.push_error(format!(
1112 "Schema-only snapshot should not contain data chunks (found {})",
1113 chunk_count
1114 ));
1115 }
1116 let mut first_data_file: Option<String> = None;
1117 storage
1118 .for_each_file_recursive("data/", |path| {
1119 let should_update = match &first_data_file {
1120 Some(current) => path.as_str() < current.as_str(),
1121 None => true,
1122 };
1123 if should_update {
1124 first_data_file = Some(path);
1125 }
1126 Ok(())
1127 })
1128 .await?;
1129 if let Some(path) = first_data_file {
1130 report.push_error(format!(
1131 "Schema-only snapshot should not contain data files (found '{}')",
1132 path
1133 ));
1134 }
1135 } else if report.manifest.chunks.is_empty() {
1136 report.push_error("Full snapshot should contain at least one data chunk");
1137 } else {
1138 verify_chunks_and_data_files(storage, &mut report).await?;
1139 }
1140
1141 Ok(report)
1142}
1143
1144fn summarize_chunks(manifest: &Manifest) -> VerifyChunkSummary {
1145 VerifyChunkSummary {
1146 total: manifest.chunks.len(),
1147 completed: manifest.completed_count(),
1148 skipped: manifest.skipped_count(),
1149 pending: manifest.pending_count(),
1150 in_progress: manifest.in_progress_count(),
1151 failed: manifest.failed_count(),
1152 }
1153}
1154
1155#[derive(Debug)]
1157struct ChunkFile {
1158 chunk_id: u32,
1159 path: String,
1160}
1161
1162#[derive(Debug, Default)]
1167struct VerifyPlan {
1168 files_to_check: Vec<ChunkFile>,
1170 claimed_data_files: HashSet<String>,
1174 data_files_total: usize,
1176 problems: Vec<VerifyProblem>,
1178}
1179
1180#[derive(Debug)]
1183struct VerifyDataScan {
1184 existing_claimed_data_files: HashSet<String>,
1185 unexpected_data_files: Vec<String>,
1186}
1187
1188#[derive(Debug, Default)]
1190struct VerifyOutcome {
1191 data_files_total: usize,
1192 data_files_verified: usize,
1193 problems: Vec<VerifyProblem>,
1194}
1195
1196async fn verify_chunks_and_data_files(
1197 storage: &OpenDalStorage,
1198 report: &mut VerifyReport,
1199) -> Result<()> {
1200 let plan = build_verify_plan(&report.manifest);
1201 let scan = scan_data_files(storage, &plan).await?;
1202 let outcome = reconcile_plan_with_scan(plan, scan);
1203
1204 report.data_files_total = outcome.data_files_total;
1205 report.data_files_verified = outcome.data_files_verified;
1206 report.problems.extend(outcome.problems);
1207
1208 Ok(())
1209}
1210
1211fn build_verify_plan(manifest: &Manifest) -> VerifyPlan {
1213 let mut plan = VerifyPlan::default();
1214 let mut seen_chunk_ids = HashSet::new();
1215
1216 for chunk in &manifest.chunks {
1217 if !seen_chunk_ids.insert(chunk.id) {
1218 plan.problems.push(VerifyProblem {
1219 severity: VerifySeverity::Error,
1220 message: format!("Chunk {}: duplicate chunk id", chunk.id),
1221 });
1222 }
1223 for file in &chunk.files {
1224 if let Some(path) = safe_manifest_data_file_path(file) {
1225 plan.claimed_data_files.insert(path.to_string());
1226 }
1227 }
1228
1229 match chunk.status {
1230 ChunkStatus::Completed => {
1231 if chunk.files.is_empty() {
1232 plan.problems.push(VerifyProblem {
1233 severity: VerifySeverity::Error,
1234 message: format!("Chunk {}: completed chunk has no data files", chunk.id),
1235 });
1236 continue;
1237 }
1238 let allowed_prefixes = manifest
1239 .schemas
1240 .iter()
1241 .map(|schema| data_dir_for_schema_chunk(schema, chunk.id))
1242 .collect::<Vec<_>>();
1243 for file in &chunk.files {
1244 plan.data_files_total += 1;
1245 match valid_manifest_data_file_path(file, &allowed_prefixes) {
1246 Some(path) => plan.files_to_check.push(ChunkFile {
1247 chunk_id: chunk.id,
1248 path: path.to_string(),
1249 }),
1250 None => plan.problems.push(VerifyProblem {
1251 severity: VerifySeverity::Error,
1252 message: format!(
1253 "Chunk {}: invalid data file path '{}'",
1254 chunk.id, file
1255 ),
1256 }),
1257 }
1258 }
1259 }
1260 ChunkStatus::Skipped => {
1261 if !chunk.files.is_empty() {
1262 plan.problems.push(VerifyProblem {
1263 severity: VerifySeverity::Error,
1264 message: format!(
1265 "Chunk {}: skipped chunk should not list data files",
1266 chunk.id
1267 ),
1268 });
1269 }
1270 }
1271 ChunkStatus::Pending => {
1272 plan.problems.push(VerifyProblem {
1273 severity: VerifySeverity::Error,
1274 message: format!("Chunk {}: status is 'pending'", chunk.id),
1275 });
1276 }
1277 ChunkStatus::InProgress => {
1278 plan.problems.push(VerifyProblem {
1279 severity: VerifySeverity::Error,
1280 message: format!("Chunk {}: status is 'in_progress'", chunk.id),
1281 });
1282 }
1283 ChunkStatus::Failed => {
1284 let reason = chunk.error.as_deref().unwrap_or("unknown error");
1285 plan.problems.push(VerifyProblem {
1286 severity: VerifySeverity::Error,
1287 message: format!("Chunk {}: status is 'failed' (error: {})", chunk.id, reason),
1288 });
1289 }
1290 }
1291 }
1292
1293 plan
1294}
1295
1296async fn scan_data_files(storage: &OpenDalStorage, plan: &VerifyPlan) -> Result<VerifyDataScan> {
1298 let mut scan = VerifyDataScan {
1299 existing_claimed_data_files: HashSet::new(),
1300 unexpected_data_files: Vec::new(),
1301 };
1302
1303 storage
1304 .for_each_file_recursive("data/", |path| {
1305 if plan.claimed_data_files.contains(&path) {
1306 scan.existing_claimed_data_files.insert(path);
1307 } else {
1308 scan.unexpected_data_files.push(path);
1309 }
1310 Ok(())
1311 })
1312 .await?;
1313
1314 Ok(scan)
1315}
1316
1317fn reconcile_plan_with_scan(plan: VerifyPlan, mut scan: VerifyDataScan) -> VerifyOutcome {
1323 let mut problems = plan.problems;
1324 let mut data_files_verified = 0;
1325
1326 for file in &plan.files_to_check {
1327 if scan.existing_claimed_data_files.contains(&file.path) {
1328 data_files_verified += 1;
1329 } else {
1330 problems.push(VerifyProblem {
1331 severity: VerifySeverity::Error,
1332 message: format!("Chunk {}: missing file '{}'", file.chunk_id, file.path),
1333 });
1334 }
1335 }
1336
1337 scan.unexpected_data_files.sort();
1338 for path in scan.unexpected_data_files {
1339 problems.push(VerifyProblem {
1340 severity: VerifySeverity::Error,
1341 message: format!("Unexpected data file '{}' is not listed in manifest", path),
1342 });
1343 }
1344
1345 VerifyOutcome {
1346 data_files_total: plan.data_files_total,
1347 data_files_verified,
1348 problems,
1349 }
1350}
1351
1352fn valid_manifest_data_file_path<'a>(
1353 path: &'a str,
1354 allowed_prefixes: &[String],
1355) -> Option<&'a str> {
1356 let normalized = safe_manifest_data_file_path(path)?;
1357
1358 if !allowed_prefixes
1359 .iter()
1360 .any(|prefix| normalized.starts_with(prefix))
1361 {
1362 return None;
1363 }
1364
1365 Some(normalized)
1366}
1367
1368fn safe_manifest_data_file_path(path: &str) -> Option<&str> {
1369 let normalized = path.trim_start_matches('/');
1370 if normalized.is_empty() || !normalized.starts_with("data/") {
1371 return None;
1372 }
1373
1374 if normalized
1375 .split('/')
1376 .any(|segment| segment.is_empty() || segment == "." || segment == "..")
1377 {
1378 return None;
1379 }
1380
1381 Some(normalized)
1382}
1383
1384fn print_verify_report(snapshot: &str, report: &VerifyReport) {
1385 println!("Verifying snapshot: {}", report.manifest.snapshot_id);
1386 println!(" Location: {}", snapshot);
1387 if report.manifest.version == MANIFEST_VERSION {
1388 println!(" Manifest: OK (version {})", report.manifest.version);
1389 } else {
1390 println!(
1391 " Manifest: ERROR (version {}, expected {})",
1392 report.manifest.version, MANIFEST_VERSION
1393 );
1394 }
1395 println!(
1396 " Schema files: {}",
1397 if report.schema_index_exists {
1398 format!("OK ({})", SCHEMAS_FILE)
1399 } else {
1400 format!("WARN (missing {})", SCHEMAS_FILE)
1401 }
1402 );
1403 if report.ddl_file_count > 0 {
1404 println!(" DDL files: {} file(s) found", report.ddl_file_count);
1405 } else {
1406 println!(" DDL files: not present");
1407 }
1408
1409 let chunks = &report.chunk_summary;
1410 println!(
1411 " Chunks: {} total ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1412 chunks.total,
1413 chunks.completed,
1414 chunks.skipped,
1415 chunks.pending,
1416 chunks.in_progress,
1417 chunks.failed
1418 );
1419
1420 if report.manifest.schema_only {
1421 println!(" Data files: skipped (schema-only)");
1422 } else {
1423 println!(
1424 " Data files: {}/{} files verified",
1425 report.data_files_verified, report.data_files_total
1426 );
1427 }
1428
1429 if report.problems.is_empty() {
1430 println!();
1431 println!("Snapshot is valid.");
1432 return;
1433 }
1434
1435 println!();
1436 println!("Problems found:");
1437 for problem in &report.problems {
1438 println!(" [{}] {}", problem.severity.as_str(), problem.message);
1439 }
1440 println!();
1441 println!(
1442 "Snapshot has {} error(s), {} warning(s).",
1443 report.error_count(),
1444 report.warning_count()
1445 );
1446}
1447
1448fn print_delete_summary(snapshot: &str, manifest: &Manifest) {
1449 println!("Snapshot: {}", manifest.snapshot_id);
1450 println!(" Location: {}", snapshot);
1451 println!(
1452 " Created: {} UTC",
1453 manifest.created_at.format("%Y-%m-%d %H:%M:%S")
1454 );
1455 println!(" Catalog: {}", manifest.catalog);
1456 println!(" Schemas: {}", manifest.schemas.join(", "));
1457 println!(" Chunks: {}", format_delete_chunks(manifest));
1458}
1459
1460fn format_delete_chunks(manifest: &Manifest) -> String {
1461 if manifest.schema_only {
1462 return "0 (schema-only)".to_string();
1463 }
1464
1465 let summary = summarize_chunks(manifest);
1466 if manifest.is_complete() {
1467 format!("{} (all processed)", summary.total)
1468 } else {
1469 format!(
1470 "{} ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1471 summary.total,
1472 summary.completed,
1473 summary.skipped,
1474 summary.pending,
1475 summary.in_progress,
1476 summary.failed
1477 )
1478 }
1479}
1480
1481fn confirm_delete(snapshot: &str) -> Result<bool> {
1482 println!();
1483 println!(
1484 "Warning: this removes the entire snapshot directory/prefix, not only files listed in manifest."
1485 );
1486 println!("This will permanently delete all data under:");
1487 println!(" {}", display_snapshot_prefix(snapshot));
1488 print!("Type 'yes' to confirm deletion: ");
1489 io::stdout().flush().map_err(|error| {
1490 IoSnafu {
1491 operation: "flushing delete confirmation prompt",
1492 error,
1493 }
1494 .build()
1495 })?;
1496
1497 let mut input = String::new();
1498 io::stdin().read_line(&mut input).map_err(|error| {
1499 IoSnafu {
1500 operation: "reading delete confirmation",
1501 error,
1502 }
1503 .build()
1504 })?;
1505
1506 Ok(delete_confirmation_matches(&input))
1507}
1508
1509fn delete_confirmation_matches(input: &str) -> bool {
1510 input.trim() == "yes"
1511}
1512
1513fn display_snapshot_prefix(snapshot: &str) -> String {
1514 if snapshot.ends_with('/') {
1515 snapshot.to_string()
1516 } else {
1517 format!("{}/", snapshot)
1518 }
1519}
1520
1521#[cfg(test)]
1522mod tests {
1523 use chrono::TimeZone;
1524 use clap::Parser;
1525 use tempfile::tempdir;
1526 use url::Url;
1527
1528 use super::*;
1529 use crate::data::path::ddl_path_for_schema;
1530
1531 #[test]
1532 fn test_ddl_path_for_schema() {
1533 assert_eq!(ddl_path_for_schema("public"), "schema/ddl/public.sql");
1534 assert_eq!(
1535 ddl_path_for_schema("../evil"),
1536 "schema/ddl/%2E%2E%2Fevil.sql"
1537 );
1538 }
1539
1540 #[test]
1541 fn test_build_schema_ddl_order() {
1542 let ddl = build_schema_ddl(
1543 "public",
1544 "CREATE DATABASE public;\n".to_string(),
1545 vec!["PHYSICAL;\n".to_string()],
1546 vec!["TABLE;\n".to_string()],
1547 vec!["VIEW;\n".to_string()],
1548 );
1549
1550 let db_pos = ddl.find("CREATE DATABASE").unwrap();
1551 let physical_pos = ddl.find("PHYSICAL;").unwrap();
1552 let table_pos = ddl.find("TABLE;").unwrap();
1553 let view_pos = ddl.find("VIEW;").unwrap();
1554 assert!(db_pos < physical_pos);
1555 assert!(physical_pos < table_pos);
1556 assert!(table_pos < view_pos);
1557 }
1558
1559 #[tokio::test]
1560 async fn test_build_rejects_chunk_window_without_bounds() {
1561 let cmd = ExportCreateCommand::parse_from([
1562 "export-v2-create",
1563 "--addr",
1564 "127.0.0.1:4000",
1565 "--to",
1566 "file:///tmp/export-v2-test",
1567 "--chunk-time-window",
1568 "1h",
1569 ]);
1570
1571 let result = cmd.build().await;
1572 assert!(result.is_err());
1573 let error = result.err().unwrap().to_string();
1574
1575 assert!(error.contains("chunk_time_window requires both --start-time and --end-time"));
1576 }
1577
1578 #[tokio::test]
1579 async fn test_build_rejects_data_export_args_in_schema_only_mode() {
1580 let cmd = ExportCreateCommand::parse_from([
1581 "export-v2-create",
1582 "--addr",
1583 "127.0.0.1:4000",
1584 "--to",
1585 "file:///tmp/export-v2-test",
1586 "--schema-only",
1587 "--start-time",
1588 "2024-01-01T00:00:00Z",
1589 "--end-time",
1590 "2024-01-02T00:00:00Z",
1591 "--chunk-time-window",
1592 "1h",
1593 "--format",
1594 "csv",
1595 "--parallelism",
1596 "2",
1597 "--chunk-parallelism",
1598 "2",
1599 ]);
1600
1601 let error = cmd.build().await.err().unwrap().to_string();
1602
1603 assert!(error.contains("--schema-only cannot be used with data export arguments"));
1604 assert!(error.contains("--start-time"));
1605 assert!(error.contains("--end-time"));
1606 assert!(error.contains("--chunk-time-window"));
1607 assert!(error.contains("--format"));
1608 assert!(error.contains("--parallelism"));
1609 assert!(error.contains("--chunk-parallelism"));
1610 }
1611
1612 #[test]
1613 fn test_chunk_parallelism_defaults_to_one() {
1614 let cmd = ExportCreateCommand::parse_from([
1615 "export-v2-create",
1616 "--addr",
1617 "127.0.0.1:4000",
1618 "--to",
1619 "file:///tmp/export-v2-test",
1620 ]);
1621
1622 assert_eq!(1, cmd.chunk_parallelism);
1623 }
1624
1625 #[test]
1626 fn test_progress_mode_defaults_to_auto() {
1627 let cmd = ExportCreateCommand::parse_from([
1628 "export-v2-create",
1629 "--addr",
1630 "127.0.0.1:4000",
1631 "--to",
1632 "file:///tmp/export-v2-test",
1633 ]);
1634
1635 assert_eq!(ProgressMode::Auto, cmd.progress);
1636 }
1637
1638 #[test]
1639 fn test_progress_mode_parses_explicit_values() {
1640 for (value, expected) in [
1641 ("auto", ProgressMode::Auto),
1642 ("always", ProgressMode::Always),
1643 ("never", ProgressMode::Never),
1644 ] {
1645 let cmd = ExportCreateCommand::parse_from([
1646 "export-v2-create",
1647 "--addr",
1648 "127.0.0.1:4000",
1649 "--to",
1650 "file:///tmp/export-v2-test",
1651 "--progress",
1652 value,
1653 ]);
1654
1655 assert_eq!(expected, cmd.progress);
1656 }
1657 }
1658
1659 #[test]
1660 fn test_progress_mode_rejects_unknown_value() {
1661 assert!(
1662 ExportCreateCommand::try_parse_from([
1663 "export-v2-create",
1664 "--addr",
1665 "127.0.0.1:4000",
1666 "--to",
1667 "file:///tmp/export-v2-test",
1668 "--progress",
1669 "bogus",
1670 ])
1671 .is_err()
1672 );
1673 }
1674
1675 #[test]
1676 fn test_chunk_parallelism_parses_valid_value() {
1677 let cmd = ExportCreateCommand::parse_from([
1678 "export-v2-create",
1679 "--addr",
1680 "127.0.0.1:4000",
1681 "--to",
1682 "file:///tmp/export-v2-test",
1683 "--chunk-parallelism",
1684 "64",
1685 ]);
1686
1687 assert_eq!(64, cmd.chunk_parallelism);
1688 }
1689
1690 #[test]
1691 fn test_chunk_parallelism_rejects_out_of_range_values() {
1692 assert!(
1693 ExportCreateCommand::try_parse_from([
1694 "export-v2-create",
1695 "--addr",
1696 "127.0.0.1:4000",
1697 "--to",
1698 "file:///tmp/export-v2-test",
1699 "--chunk-parallelism",
1700 "0",
1701 ])
1702 .is_err()
1703 );
1704 assert!(
1705 ExportCreateCommand::try_parse_from([
1706 "export-v2-create",
1707 "--addr",
1708 "127.0.0.1:4000",
1709 "--to",
1710 "file:///tmp/export-v2-test",
1711 "--chunk-parallelism",
1712 "65",
1713 ])
1714 .is_err()
1715 );
1716 }
1717
1718 #[test]
1719 fn test_schema_only_mode_mismatch_error_message() {
1720 let error = crate::data::export_v2::error::SchemaOnlyModeMismatchSnafu {
1721 existing_schema_only: false,
1722 requested_schema_only: true,
1723 }
1724 .build()
1725 .to_string();
1726
1727 assert!(error.contains("existing: false"));
1728 assert!(error.contains("requested: true"));
1729 }
1730
1731 #[test]
1732 fn test_validate_resume_config_rejects_catalog_mismatch() {
1733 let manifest = Manifest::new_for_export(
1734 "greptime".to_string(),
1735 vec!["public".to_string()],
1736 false,
1737 TimeRange::unbounded(),
1738 DataFormat::Parquet,
1739 None,
1740 )
1741 .unwrap();
1742 let config = ExportConfig {
1743 catalog: "other".to_string(),
1744 schemas: None,
1745 schema_only: false,
1746 format: DataFormat::Parquet,
1747 force: false,
1748 time_range: TimeRange::unbounded(),
1749 chunk_time_window: None,
1750 parallelism: 1,
1751 chunk_parallelism: 1,
1752 progress: ProgressMode::Auto,
1753 snapshot_uri: "file:///tmp/snapshot".to_string(),
1754 storage_config: ObjectStoreConfig::default(),
1755 };
1756
1757 let error = validate_resume_config(&manifest, &config)
1758 .err()
1759 .unwrap()
1760 .to_string();
1761 assert!(error.contains("catalog"));
1762 }
1763
1764 #[test]
1765 fn test_validate_resume_config_accepts_schema_selection_with_different_case_and_order() {
1766 let manifest = Manifest::new_for_export(
1767 "greptime".to_string(),
1768 vec!["public".to_string(), "analytics".to_string()],
1769 false,
1770 TimeRange::unbounded(),
1771 DataFormat::Parquet,
1772 None,
1773 )
1774 .unwrap();
1775 let config = ExportConfig {
1776 catalog: "greptime".to_string(),
1777 schemas: Some(vec![
1778 "ANALYTICS".to_string(),
1779 "PUBLIC".to_string(),
1780 "public".to_string(),
1781 ]),
1782 schema_only: false,
1783 format: DataFormat::Parquet,
1784 force: false,
1785 time_range: TimeRange::unbounded(),
1786 chunk_time_window: None,
1787 parallelism: 1,
1788 chunk_parallelism: 1,
1789 progress: ProgressMode::Auto,
1790 snapshot_uri: "file:///tmp/snapshot".to_string(),
1791 storage_config: ObjectStoreConfig::default(),
1792 };
1793
1794 assert!(validate_resume_config(&manifest, &config).is_ok());
1795 }
1796
1797 #[test]
1798 fn test_validate_resume_config_rejects_chunk_plan_mismatch() {
1799 let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1800 let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 2, 0, 0).unwrap();
1801 let time_range = TimeRange::new(Some(start), Some(end));
1802 let manifest = Manifest::new_for_export(
1803 "greptime".to_string(),
1804 vec!["public".to_string()],
1805 false,
1806 time_range.clone(),
1807 DataFormat::Parquet,
1808 None,
1809 )
1810 .unwrap();
1811 let config = ExportConfig {
1812 catalog: "greptime".to_string(),
1813 schemas: None,
1814 schema_only: false,
1815 format: DataFormat::Parquet,
1816 force: false,
1817 time_range,
1818 chunk_time_window: Some(Duration::from_secs(3600)),
1819 parallelism: 1,
1820 chunk_parallelism: 1,
1821 progress: ProgressMode::Auto,
1822 snapshot_uri: "file:///tmp/snapshot".to_string(),
1823 storage_config: ObjectStoreConfig::default(),
1824 };
1825
1826 let error = validate_resume_config(&manifest, &config)
1827 .err()
1828 .unwrap()
1829 .to_string();
1830 assert!(error.contains("chunk plan"));
1831 }
1832
1833 #[test]
1834 fn test_validate_resume_config_rejects_format_mismatch() {
1835 let manifest = Manifest::new_for_export(
1836 "greptime".to_string(),
1837 vec!["public".to_string()],
1838 false,
1839 TimeRange::unbounded(),
1840 DataFormat::Parquet,
1841 None,
1842 )
1843 .unwrap();
1844 let config = ExportConfig {
1845 catalog: "greptime".to_string(),
1846 schemas: None,
1847 schema_only: false,
1848 format: DataFormat::Csv,
1849 force: false,
1850 time_range: TimeRange::unbounded(),
1851 chunk_time_window: None,
1852 parallelism: 1,
1853 chunk_parallelism: 1,
1854 progress: ProgressMode::Auto,
1855 snapshot_uri: "file:///tmp/snapshot".to_string(),
1856 storage_config: ObjectStoreConfig::default(),
1857 };
1858
1859 let error = validate_resume_config(&manifest, &config)
1860 .err()
1861 .unwrap()
1862 .to_string();
1863 assert!(error.contains("format"));
1864 }
1865
1866 #[test]
1867 fn test_validate_resume_config_rejects_time_range_mismatch() {
1868 let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1869 let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 1, 0, 0).unwrap();
1870 let manifest = Manifest::new_for_export(
1871 "greptime".to_string(),
1872 vec!["public".to_string()],
1873 false,
1874 TimeRange::new(Some(start), Some(end)),
1875 DataFormat::Parquet,
1876 None,
1877 )
1878 .unwrap();
1879 let config = ExportConfig {
1880 catalog: "greptime".to_string(),
1881 schemas: None,
1882 schema_only: false,
1883 format: DataFormat::Parquet,
1884 force: false,
1885 time_range: TimeRange::new(Some(start), Some(start)),
1886 chunk_time_window: None,
1887 parallelism: 1,
1888 chunk_parallelism: 1,
1889 progress: ProgressMode::Auto,
1890 snapshot_uri: "file:///tmp/snapshot".to_string(),
1891 storage_config: ObjectStoreConfig::default(),
1892 };
1893
1894 let error = validate_resume_config(&manifest, &config)
1895 .err()
1896 .unwrap()
1897 .to_string();
1898 assert!(error.contains("time_range"));
1899 }
1900
1901 #[tokio::test]
1902 async fn test_scan_snapshots_sorts_and_tracks_unreadable_manifests() {
1903 let dir = tempdir().unwrap();
1904 write_test_manifest(
1905 dir.path(),
1906 "older",
1907 test_manifest(
1908 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1909 false,
1910 true,
1911 ),
1912 );
1913 write_test_manifest(
1914 dir.path(),
1915 "newer",
1916 test_manifest(
1917 chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap(),
1918 false,
1919 true,
1920 ),
1921 );
1922
1923 std::fs::create_dir_all(dir.path().join("empty-dir")).unwrap();
1924 std::fs::create_dir_all(dir.path().join("not-snapshot")).unwrap();
1925 std::fs::write(dir.path().join("not-snapshot").join("data.txt"), "x").unwrap();
1926 std::fs::create_dir_all(dir.path().join("broken")).unwrap();
1927 std::fs::write(dir.path().join("broken").join(MANIFEST_FILE), "{not-json").unwrap();
1928
1929 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1930 let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
1931 let result = scan_snapshots(&storage).await.unwrap();
1932
1933 assert_eq!(result.snapshots.len(), 2);
1934 assert_eq!(
1935 result.snapshots[0].manifest.created_at,
1936 chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap()
1937 );
1938 assert_eq!(
1939 result.snapshots[1].manifest.created_at,
1940 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap()
1941 );
1942 assert_eq!(result.unreadable, vec!["broken/".to_string()]);
1943 assert_eq!(result.snapshots[0].path, "newer/");
1944 assert_eq!(result.snapshots[1].path, "older/");
1945 }
1946
1947 #[test]
1948 fn test_snapshot_list_status_and_chunk_summary() {
1949 let schema_only = test_manifest(
1950 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1951 true,
1952 true,
1953 );
1954 assert_eq!(snapshot_status(&schema_only), "schema-only");
1955 assert_eq!(format_list_chunks(&schema_only), "0");
1956
1957 let complete = test_manifest(
1958 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1959 false,
1960 true,
1961 );
1962 assert_eq!(snapshot_status(&complete), "complete");
1963 assert_eq!(format_list_chunks(&complete), "2/2");
1964 assert_eq!(format_delete_chunks(&complete), "2 (all processed)");
1965
1966 let incomplete = test_manifest(
1967 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1968 false,
1969 false,
1970 );
1971 assert_eq!(snapshot_status(&incomplete), "incomplete");
1972 assert_eq!(format_list_chunks(&incomplete), "1/2");
1973 assert_eq!(
1974 format_delete_chunks(&incomplete),
1975 "2 (1 completed, 0 skipped, 1 pending, 0 in_progress, 0 failed)"
1976 );
1977 }
1978
1979 #[tokio::test]
1980 async fn test_delete_build_rejects_bucket_root_uri() {
1981 let cmd = ExportDeleteCommand::parse_from([
1982 "export-v2-delete",
1983 "--snapshot",
1984 "s3://bucket",
1985 "--no-confirm",
1986 ]);
1987
1988 let error = cmd.build().await.err().unwrap().to_string();
1989 assert!(error.contains("non-empty path"));
1990 }
1991
1992 #[test]
1993 fn test_delete_skip_confirmation_aliases() {
1994 let no_confirm = ExportDeleteCommand::parse_from([
1995 "export-v2-delete",
1996 "--snapshot",
1997 "s3://bucket/snapshot",
1998 "--no-confirm",
1999 ]);
2000 assert!(no_confirm.skip_confirmation);
2001
2002 let yes = ExportDeleteCommand::parse_from([
2003 "export-v2-delete",
2004 "--snapshot",
2005 "s3://bucket/snapshot",
2006 "--yes",
2007 ]);
2008 assert!(yes.skip_confirmation);
2009 }
2010
2011 #[tokio::test]
2012 async fn test_delete_snapshot_with_no_confirm_removes_snapshot_contents() {
2013 let parent = tempdir().unwrap();
2014 let snapshot = parent.path().join("snapshot");
2015 let sibling = parent.path().join("sibling");
2016 std::fs::create_dir_all(&snapshot).unwrap();
2017 std::fs::create_dir_all(&sibling).unwrap();
2018 std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
2019 write_root_manifest(
2020 &snapshot,
2021 test_manifest(
2022 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2023 true,
2024 true,
2025 ),
2026 );
2027 write_snapshot_file(&snapshot, "schema/schemas.json", b"[]");
2028
2029 let uri = Url::from_directory_path(&snapshot).unwrap().to_string();
2030 let delete = ExportDelete {
2031 snapshot: uri,
2032 skip_confirmation: true,
2033 storage: file_storage_for_dir(&snapshot),
2034 };
2035
2036 delete
2037 .run_with_confirmation(|_| unreachable!())
2038 .await
2039 .unwrap();
2040
2041 assert!(!snapshot.join(MANIFEST_FILE).exists());
2042 assert!(!snapshot.join("schema/schemas.json").exists());
2043 assert!(sibling.join("keep.txt").exists());
2044 }
2045
2046 #[tokio::test]
2047 async fn test_delete_snapshot_requires_manifest() {
2048 let dir = tempdir().unwrap();
2049 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
2050 let delete = ExportDelete {
2051 snapshot: uri,
2052 skip_confirmation: true,
2053 storage: file_storage_for_dir(dir.path()),
2054 };
2055
2056 let error = delete
2057 .run_with_confirmation(|_| unreachable!())
2058 .await
2059 .err()
2060 .unwrap()
2061 .to_string();
2062
2063 assert!(error.contains("Snapshot not found"));
2064 assert!(dir.path().exists());
2065 }
2066
2067 #[tokio::test]
2068 async fn test_delete_snapshot_cancels_without_exact_confirmation() {
2069 let dir = tempdir().unwrap();
2070 write_root_manifest(
2071 dir.path(),
2072 test_manifest(
2073 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2074 true,
2075 true,
2076 ),
2077 );
2078 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2079 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
2080 let delete = ExportDelete {
2081 snapshot: uri.clone(),
2082 skip_confirmation: false,
2083 storage: file_storage_for_dir(dir.path()),
2084 };
2085
2086 delete
2087 .run_with_confirmation(|snapshot| {
2088 assert_eq!(snapshot, uri);
2089 Ok(false)
2090 })
2091 .await
2092 .unwrap();
2093
2094 assert!(dir.path().join(MANIFEST_FILE).exists());
2095 assert!(dir.path().join("schema/schemas.json").exists());
2096 }
2097
2098 #[test]
2099 fn test_delete_confirmation_requires_exact_yes() {
2100 assert!(delete_confirmation_matches("yes"));
2101 assert!(delete_confirmation_matches(" yes\n"));
2102 assert!(!delete_confirmation_matches("YES"));
2103 assert!(!delete_confirmation_matches("y"));
2104 assert!(!delete_confirmation_matches("yes please"));
2105 }
2106
2107 #[test]
2108 fn test_display_snapshot_prefix_adds_trailing_slash() {
2109 assert_eq!(
2110 display_snapshot_prefix("s3://bucket/snapshot"),
2111 "s3://bucket/snapshot/"
2112 );
2113 assert_eq!(
2114 display_snapshot_prefix("s3://bucket/snapshot/"),
2115 "s3://bucket/snapshot/"
2116 );
2117 }
2118
2119 #[tokio::test]
2120 async fn test_verify_snapshot_accepts_valid_full_snapshot() {
2121 let dir = tempdir().unwrap();
2122 let manifest = test_manifest(
2123 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2124 false,
2125 true,
2126 );
2127 write_root_manifest(dir.path(), manifest);
2128 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2129 write_default_ddl_files(dir.path());
2130 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2131
2132 let storage = file_storage_for_dir(dir.path());
2133 let report = verify_snapshot(&storage).await.unwrap();
2134
2135 assert_eq!(report.error_count(), 0);
2136 assert_eq!(report.warning_count(), 0);
2137 assert_eq!(report.data_files_total, 1);
2138 assert_eq!(report.data_files_verified, 1);
2139 }
2140
2141 #[tokio::test]
2142 async fn test_verify_snapshot_reports_missing_data_file_and_failed_chunk() {
2143 let dir = tempdir().unwrap();
2144 let mut manifest = test_manifest(
2145 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2146 false,
2147 true,
2148 );
2149 manifest.chunks[1].mark_failed("copy failed".to_string());
2150 write_root_manifest(dir.path(), manifest);
2151 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2152 write_default_ddl_files(dir.path());
2153
2154 let storage = file_storage_for_dir(dir.path());
2155 let report = verify_snapshot(&storage).await.unwrap();
2156
2157 assert_eq!(report.error_count(), 2);
2158 assert!(
2159 report
2160 .problems
2161 .iter()
2162 .any(|problem| problem.message.contains("missing file"))
2163 );
2164 assert!(
2165 report
2166 .problems
2167 .iter()
2168 .any(|problem| problem.message.contains("status is 'failed'"))
2169 );
2170 }
2171
2172 #[tokio::test]
2173 async fn test_verify_snapshot_reports_missing_schema_index_as_warning() {
2174 let dir = tempdir().unwrap();
2175 let manifest = test_manifest(
2176 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2177 false,
2178 true,
2179 );
2180 write_root_manifest(dir.path(), manifest);
2181 write_default_ddl_files(dir.path());
2182 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2183
2184 let storage = file_storage_for_dir(dir.path());
2185 let report = verify_snapshot(&storage).await.unwrap();
2186
2187 assert_eq!(report.error_count(), 0);
2188 assert_eq!(report.warning_count(), 1);
2189 assert!(
2190 report
2191 .problems
2192 .iter()
2193 .any(|problem| problem.message.contains("Missing schema index"))
2194 );
2195 }
2196
2197 #[tokio::test]
2198 async fn test_verify_snapshot_rejects_schema_only_snapshot_with_chunks() {
2199 let dir = tempdir().unwrap();
2200 let mut manifest = test_manifest(
2201 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2202 true,
2203 true,
2204 );
2205 let mut chunk = ChunkMeta::new(1, TimeRange::unbounded());
2206 chunk.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2207 manifest.chunks.push(chunk);
2208 write_root_manifest(dir.path(), manifest);
2209 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2210 write_default_ddl_files(dir.path());
2211
2212 let storage = file_storage_for_dir(dir.path());
2213 let report = verify_snapshot(&storage).await.unwrap();
2214
2215 assert_eq!(report.error_count(), 1);
2216 assert_eq!(report.data_files_total, 0);
2217 assert!(
2218 report
2219 .problems
2220 .iter()
2221 .any(|problem| problem.message.contains("should not contain data chunks"))
2222 );
2223 }
2224
2225 #[tokio::test]
2226 async fn test_verify_snapshot_rejects_schema_only_snapshot_with_data_files() {
2227 let dir = tempdir().unwrap();
2228 let manifest = test_manifest(
2229 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2230 true,
2231 true,
2232 );
2233 write_root_manifest(dir.path(), manifest);
2234 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2235 write_default_ddl_files(dir.path());
2236 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2237
2238 let storage = file_storage_for_dir(dir.path());
2239 let report = verify_snapshot(&storage).await.unwrap();
2240
2241 assert_eq!(report.error_count(), 1);
2242 assert_eq!(report.data_files_total, 0);
2243 assert!(
2244 report
2245 .problems
2246 .iter()
2247 .any(|problem| problem.message.contains("should not contain data files"))
2248 );
2249 }
2250
2251 #[tokio::test]
2252 async fn test_verify_snapshot_rejects_full_snapshot_without_chunks() {
2253 let dir = tempdir().unwrap();
2254 let mut manifest = test_manifest(
2255 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2256 false,
2257 true,
2258 );
2259 manifest.chunks.clear();
2260 write_root_manifest(dir.path(), manifest);
2261 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2262 write_default_ddl_files(dir.path());
2263
2264 let storage = file_storage_for_dir(dir.path());
2265 let report = verify_snapshot(&storage).await.unwrap();
2266
2267 assert_eq!(report.error_count(), 1);
2268 assert_eq!(report.data_files_total, 0);
2269 assert!(
2270 report
2271 .problems
2272 .iter()
2273 .any(|problem| problem.message.contains("at least one data chunk"))
2274 );
2275 }
2276
2277 #[tokio::test]
2278 async fn test_verify_snapshot_rejects_skipped_chunk_data_files() {
2279 let dir = tempdir().unwrap();
2280 let manifest = test_manifest(
2281 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2282 false,
2283 true,
2284 );
2285 write_root_manifest(dir.path(), manifest);
2286 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2287 write_default_ddl_files(dir.path());
2288 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2289 write_snapshot_file(dir.path(), "data/public/2/file.parquet", b"data");
2290
2291 let storage = file_storage_for_dir(dir.path());
2292 let report = verify_snapshot(&storage).await.unwrap();
2293
2294 assert_eq!(report.error_count(), 1);
2295 assert!(
2296 report
2297 .problems
2298 .iter()
2299 .any(|problem| { problem.message.contains("Unexpected data file") })
2300 );
2301 }
2302
2303 #[tokio::test]
2304 async fn test_verify_snapshot_rejects_duplicate_chunk_ids() {
2305 let dir = tempdir().unwrap();
2306 let mut manifest = test_manifest(
2307 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2308 false,
2309 true,
2310 );
2311 let mut duplicate = ChunkMeta::new(1, TimeRange::unbounded());
2312 duplicate.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2313 manifest.chunks.push(duplicate);
2314 write_root_manifest(dir.path(), manifest);
2315 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2316 write_default_ddl_files(dir.path());
2317 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2318
2319 let storage = file_storage_for_dir(dir.path());
2320 let report = verify_snapshot(&storage).await.unwrap();
2321
2322 assert_eq!(report.error_count(), 1);
2323 assert!(
2324 report
2325 .problems
2326 .iter()
2327 .any(|problem| problem.message.contains("duplicate chunk id"))
2328 );
2329 }
2330
2331 #[tokio::test]
2332 async fn test_verify_snapshot_requires_all_schema_ddl() {
2333 let dir = tempdir().unwrap();
2334 let manifest = test_manifest(
2335 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2336 true,
2337 true,
2338 );
2339 write_root_manifest(dir.path(), manifest);
2340 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2341 write_snapshot_file(
2342 dir.path(),
2343 "schema/ddl/public.sql",
2344 b"CREATE DATABASE public;",
2345 );
2346
2347 let storage = file_storage_for_dir(dir.path());
2348 let report = verify_snapshot(&storage).await.unwrap();
2349
2350 assert_eq!(report.error_count(), 1);
2351 assert!(
2352 report
2353 .problems
2354 .iter()
2355 .any(|problem| problem.message.contains("analytics"))
2356 );
2357 }
2358
2359 #[tokio::test]
2360 async fn test_verify_snapshot_reports_missing_ddl_dir() {
2361 let dir = tempdir().unwrap();
2362 let manifest = test_manifest(
2363 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2364 false,
2365 true,
2366 );
2367 write_root_manifest(dir.path(), manifest);
2368 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2369 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2370
2371 let storage = file_storage_for_dir(dir.path());
2372 let report = verify_snapshot(&storage).await.unwrap();
2373
2374 assert_eq!(report.error_count(), 2);
2375 assert!(
2376 report
2377 .problems
2378 .iter()
2379 .any(|problem| problem.message.contains("schema/ddl/public.sql"))
2380 );
2381 assert!(
2382 report
2383 .problems
2384 .iter()
2385 .any(|problem| problem.message.contains("schema/ddl/analytics.sql"))
2386 );
2387 }
2388
2389 #[tokio::test]
2390 async fn test_verify_snapshot_reports_manifest_version_mismatch() {
2391 let dir = tempdir().unwrap();
2392 let mut manifest = test_manifest(
2393 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2394 false,
2395 true,
2396 );
2397 manifest.version = MANIFEST_VERSION + 1;
2398 write_root_manifest(dir.path(), manifest);
2399 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2400 write_default_ddl_files(dir.path());
2401 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2402
2403 let storage = file_storage_for_dir(dir.path());
2404 let report = verify_snapshot(&storage).await.unwrap();
2405
2406 assert_eq!(report.error_count(), 1);
2407 assert!(
2408 report
2409 .problems
2410 .iter()
2411 .any(|problem| problem.message.contains("Manifest version mismatch"))
2412 );
2413 }
2414
2415 #[tokio::test]
2416 async fn test_verify_snapshot_rejects_invalid_data_file_paths() {
2417 let dir = tempdir().unwrap();
2418 let mut manifest = test_manifest(
2419 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2420 false,
2421 true,
2422 );
2423 manifest.chunks[0].files = vec!["data/public/1/../file.parquet".to_string()];
2424 write_root_manifest(dir.path(), manifest);
2425 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2426 write_default_ddl_files(dir.path());
2427
2428 let storage = file_storage_for_dir(dir.path());
2429 let report = verify_snapshot(&storage).await.unwrap();
2430
2431 assert_eq!(report.error_count(), 1);
2432 assert!(
2433 report
2434 .problems
2435 .iter()
2436 .any(|problem| problem.message.contains("invalid data file path"))
2437 );
2438 assert_eq!(report.data_files_verified, 0);
2439 }
2440
2441 #[tokio::test]
2442 async fn test_verify_snapshot_accepts_leading_slash_manifest_data_paths() {
2443 let dir = tempdir().unwrap();
2444 let mut manifest = test_manifest(
2445 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2446 false,
2447 true,
2448 );
2449 manifest.chunks[0].files = vec!["/data/public/1/file.parquet".to_string()];
2450 write_root_manifest(dir.path(), manifest);
2451 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2452 write_default_ddl_files(dir.path());
2453 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2454
2455 let storage = file_storage_for_dir(dir.path());
2456 let report = verify_snapshot(&storage).await.unwrap();
2457
2458 assert_eq!(report.error_count(), 0);
2459 assert_eq!(report.data_files_verified, 1);
2460 }
2461
2462 #[tokio::test]
2463 async fn test_verify_snapshot_rejects_unlisted_files_under_completed_chunk_prefix() {
2464 let dir = tempdir().unwrap();
2465 let manifest = test_manifest(
2466 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2467 false,
2468 true,
2469 );
2470 write_root_manifest(dir.path(), manifest);
2471 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2472 write_default_ddl_files(dir.path());
2473 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2474 write_snapshot_file(dir.path(), "data/public/1/extra.parquet", b"data");
2475
2476 let storage = file_storage_for_dir(dir.path());
2477 let report = verify_snapshot(&storage).await.unwrap();
2478
2479 assert_eq!(report.error_count(), 1);
2480 assert!(
2481 report
2482 .problems
2483 .iter()
2484 .any(|problem| problem.message.contains("Unexpected data file"))
2485 );
2486 assert_eq!(report.data_files_verified, 1);
2487 }
2488
2489 #[tokio::test]
2490 async fn test_verify_snapshot_rejects_orphan_data_files_outside_known_chunk_prefixes() {
2491 let dir = tempdir().unwrap();
2492 let manifest = test_manifest(
2493 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2494 false,
2495 true,
2496 );
2497 write_root_manifest(dir.path(), manifest);
2498 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2499 write_default_ddl_files(dir.path());
2500 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2501 write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2502
2503 let storage = file_storage_for_dir(dir.path());
2504 let report = verify_snapshot(&storage).await.unwrap();
2505
2506 assert_eq!(report.error_count(), 1);
2507 assert!(
2508 report
2509 .problems
2510 .iter()
2511 .any(|problem| problem.message.contains("Unexpected data file"))
2512 );
2513 assert_eq!(report.data_files_verified, 1);
2514 }
2515
2516 #[tokio::test]
2517 async fn test_verify_snapshot_rejects_data_files_under_wrong_chunk_or_schema() {
2518 let dir = tempdir().unwrap();
2519 let mut manifest = test_manifest(
2520 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2521 false,
2522 true,
2523 );
2524 manifest.chunks[0].files = vec![
2525 "data/public/99/file.parquet".to_string(),
2526 "data/metrics/1/file.parquet".to_string(),
2527 ];
2528 write_root_manifest(dir.path(), manifest);
2529 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2530 write_default_ddl_files(dir.path());
2531 write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2532 write_snapshot_file(dir.path(), "data/metrics/1/file.parquet", b"data");
2533
2534 let storage = file_storage_for_dir(dir.path());
2535 let report = verify_snapshot(&storage).await.unwrap();
2536
2537 assert_eq!(report.error_count(), 2);
2538 assert_eq!(report.data_files_verified, 0);
2539 assert!(
2540 report
2541 .problems
2542 .iter()
2543 .all(|problem| problem.message.contains("invalid data file path"))
2544 );
2545 }
2546
2547 #[test]
2548 fn test_build_verify_plan_classifies_chunks_without_io() {
2549 let mut manifest = test_manifest(
2550 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2551 false,
2552 true,
2553 );
2554 let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
2556 failed.mark_failed("boom".to_string());
2557 manifest.chunks.push(failed);
2558 manifest
2559 .chunks
2560 .push(ChunkMeta::new(4, TimeRange::unbounded()));
2561
2562 let plan = build_verify_plan(&manifest);
2563
2564 assert_eq!(plan.files_to_check.len(), 1);
2565 assert_eq!(plan.files_to_check[0].chunk_id, 1);
2566 assert_eq!(plan.files_to_check[0].path, "data/public/1/file.parquet");
2567 assert_eq!(plan.data_files_total, 1);
2568 assert!(
2569 plan.claimed_data_files
2570 .contains("data/public/1/file.parquet")
2571 );
2572 assert_eq!(plan.problems.len(), 2);
2573 assert!(
2574 plan.problems
2575 .iter()
2576 .any(|problem| problem.message.contains("status is 'failed'"))
2577 );
2578 assert!(
2579 plan.problems
2580 .iter()
2581 .any(|problem| problem.message.contains("status is 'pending'"))
2582 );
2583 }
2584
2585 #[tokio::test]
2586 async fn test_verify_snapshot_produces_deterministic_problem_output() {
2587 let dir = tempdir().unwrap();
2588 let manifest = test_manifest(
2589 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2590 false,
2591 true,
2592 );
2593 write_root_manifest(dir.path(), manifest);
2594 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2595 write_default_ddl_files(dir.path());
2596 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2597 for i in 0..50 {
2599 write_snapshot_file(
2600 dir.path(),
2601 &format!("data/public/1/orphan_{:02}.parquet", i),
2602 b"x",
2603 );
2604 }
2605
2606 let storage = file_storage_for_dir(dir.path());
2607 let messages = |report: &VerifyReport| {
2608 report
2609 .problems
2610 .iter()
2611 .map(|problem| problem.message.clone())
2612 .collect::<Vec<_>>()
2613 };
2614 let first = messages(&verify_snapshot(&storage).await.unwrap());
2615 let second = messages(&verify_snapshot(&storage).await.unwrap());
2616
2617 assert_eq!(first, second);
2619
2620 let orphans = first
2621 .iter()
2622 .filter(|message| message.contains("Unexpected data file"))
2623 .cloned()
2624 .collect::<Vec<_>>();
2625 assert_eq!(orphans.len(), 50);
2626 let mut sorted = orphans.clone();
2627 sorted.sort();
2628 assert_eq!(orphans, sorted);
2629 }
2630
2631 fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) {
2632 let snapshot_dir = root.join(dir);
2633 std::fs::create_dir_all(&snapshot_dir).unwrap();
2634 std::fs::write(
2635 snapshot_dir.join(MANIFEST_FILE),
2636 serde_json::to_vec_pretty(&manifest).unwrap(),
2637 )
2638 .unwrap();
2639 }
2640
2641 fn write_root_manifest(root: &std::path::Path, manifest: Manifest) {
2642 std::fs::write(
2643 root.join(MANIFEST_FILE),
2644 serde_json::to_vec_pretty(&manifest).unwrap(),
2645 )
2646 .unwrap();
2647 }
2648
2649 fn write_snapshot_file(root: &std::path::Path, relative_path: &str, content: &[u8]) {
2650 let mut path = root.to_path_buf();
2651 for segment in relative_path.split('/') {
2652 path.push(segment);
2653 }
2654 std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2655 std::fs::write(path, content).unwrap();
2656 }
2657
2658 fn write_default_ddl_files(root: &std::path::Path) {
2659 write_snapshot_file(root, "schema/ddl/public.sql", b"CREATE DATABASE public;");
2660 write_snapshot_file(
2661 root,
2662 "schema/ddl/analytics.sql",
2663 b"CREATE DATABASE analytics;",
2664 );
2665 }
2666
2667 fn file_storage_for_dir(root: &std::path::Path) -> OpenDalStorage {
2668 let uri = Url::from_directory_path(root).unwrap().to_string();
2669 OpenDalStorage::from_file_uri(&uri).unwrap()
2670 }
2671
2672 fn test_manifest(
2673 created_at: chrono::DateTime<chrono::Utc>,
2674 schema_only: bool,
2675 complete: bool,
2676 ) -> Manifest {
2677 let mut manifest = Manifest::new_for_export(
2678 "greptime".to_string(),
2679 vec!["public".to_string(), "analytics".to_string()],
2680 schema_only,
2681 TimeRange::unbounded(),
2682 DataFormat::Parquet,
2683 None,
2684 )
2685 .unwrap();
2686 manifest.created_at = created_at;
2687 manifest.updated_at = created_at;
2688
2689 if !schema_only {
2690 manifest.chunks.clear();
2691 let mut first = ChunkMeta::new(1, TimeRange::unbounded());
2692 first.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2693 manifest.chunks.push(first);
2694
2695 if complete {
2696 manifest
2697 .chunks
2698 .push(ChunkMeta::skipped(2, TimeRange::unbounded()));
2699 } else {
2700 manifest
2701 .chunks
2702 .push(ChunkMeta::new(2, TimeRange::unbounded()));
2703 }
2704 }
2705
2706 manifest
2707 }
2708}