1use std::collections::HashSet;
18use std::io::{self, Write};
19use std::time::Duration;
20
21use async_trait::async_trait;
22use clap::{Parser, Subcommand};
23use common_error::ext::BoxedError;
24use common_telemetry::info;
25use serde_json::Value;
26use snafu::{OptionExt, ResultExt};
27
28use crate::Tool;
29use crate::common::ObjectStoreConfig;
30use crate::data::export_v2::coordinator::export_data;
31use crate::data::export_v2::error::{
32 ChunkTimeWindowRequiresBoundsSnafu, DatabaseSnafu, EmptyResultSnafu, IoSnafu,
33 ManifestVersionMismatchSnafu, Result, ResumeConfigMismatchSnafu, SchemaOnlyArgsNotAllowedSnafu,
34 SchemaOnlyModeMismatchSnafu, SnapshotVerifyFailedSnafu, UnexpectedValueTypeSnafu,
35};
36use crate::data::export_v2::extractor::SchemaExtractor;
37use crate::data::export_v2::manifest::{
38 ChunkMeta, ChunkStatus, DataFormat, MANIFEST_FILE, MANIFEST_VERSION, Manifest, TimeRange,
39};
40use crate::data::export_v2::schema::{DDL_DIR, SCHEMA_DIR, SCHEMAS_FILE};
41use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
42use crate::data::snapshot_storage::{
43 OpenDalStorage, SnapshotStorage, validate_snapshot_uri, validate_uri,
44};
45use crate::data::sql::{escape_sql_identifier, escape_sql_literal};
46use crate::database::{DatabaseClient, parse_proxy_opts};
47
48#[derive(Debug, Subcommand)]
50pub enum ExportV2Command {
51 Create(ExportCreateCommand),
53 List(ExportListCommand),
55 Verify(ExportVerifyCommand),
57 Delete(ExportDeleteCommand),
59}
60
61impl ExportV2Command {
62 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
63 match self {
64 ExportV2Command::Create(cmd) => cmd.build().await,
65 ExportV2Command::List(cmd) => cmd.build().await,
66 ExportV2Command::Verify(cmd) => cmd.build().await,
67 ExportV2Command::Delete(cmd) => cmd.build().await,
68 }
69 }
70}
71
72#[derive(Debug, Parser)]
74pub struct ExportListCommand {
75 #[clap(long)]
77 location: String,
78
79 #[clap(flatten)]
81 storage: ObjectStoreConfig,
82}
83
84impl ExportListCommand {
85 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
86 validate_uri(&self.location).map_err(BoxedError::new)?;
87 let storage = OpenDalStorage::from_parent_uri(&self.location, &self.storage)
88 .map_err(BoxedError::new)?;
89
90 Ok(Box::new(ExportList {
91 location: self.location.clone(),
92 storage,
93 }))
94 }
95}
96
97pub struct ExportList {
99 location: String,
100 storage: OpenDalStorage,
101}
102
103#[async_trait]
104impl Tool for ExportList {
105 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
106 self.run().await.map_err(BoxedError::new)
107 }
108}
109
110impl ExportList {
111 async fn run(&self) -> Result<()> {
112 let result = scan_snapshots(&self.storage).await?;
113
114 println!("Scanning: {}", self.location);
115 if result.snapshots.is_empty() {
116 println!("No snapshots found.");
117 } else {
118 print_snapshot_list(&result.snapshots, result.unreadable.len());
119 }
120 print_unreadable_warnings(&result.unreadable);
121
122 Ok(())
123 }
124}
125
126#[derive(Debug, Parser)]
128pub struct ExportVerifyCommand {
129 #[clap(long)]
131 snapshot: String,
132
133 #[clap(flatten)]
135 storage: ObjectStoreConfig,
136}
137
138impl ExportVerifyCommand {
139 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
140 validate_uri(&self.snapshot).map_err(BoxedError::new)?;
141 let storage =
142 OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
143
144 Ok(Box::new(ExportVerify {
145 snapshot: self.snapshot.clone(),
146 storage,
147 }))
148 }
149}
150
151pub struct ExportVerify {
153 snapshot: String,
154 storage: OpenDalStorage,
155}
156
157#[async_trait]
158impl Tool for ExportVerify {
159 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
160 self.run().await.map_err(BoxedError::new)
161 }
162}
163
164impl ExportVerify {
165 async fn run(&self) -> Result<()> {
166 let report = verify_snapshot(&self.storage).await?;
167 print_verify_report(&self.snapshot, &report);
168
169 if report.has_problems() {
170 return SnapshotVerifyFailedSnafu {
171 errors: report.error_count(),
172 warnings: report.warning_count(),
173 }
174 .fail();
175 }
176
177 Ok(())
178 }
179}
180
181#[derive(Debug, Parser)]
183pub struct ExportDeleteCommand {
184 #[clap(long)]
186 snapshot: String,
187
188 #[clap(long = "no-confirm", alias = "yes")]
190 skip_confirmation: bool,
191
192 #[clap(flatten)]
194 storage: ObjectStoreConfig,
195}
196
197impl ExportDeleteCommand {
198 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
199 validate_snapshot_uri(&self.snapshot).map_err(BoxedError::new)?;
200 let storage =
201 OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
202
203 Ok(Box::new(ExportDelete {
204 snapshot: self.snapshot.clone(),
205 skip_confirmation: self.skip_confirmation,
206 storage,
207 }))
208 }
209}
210
211pub struct ExportDelete {
213 snapshot: String,
214 skip_confirmation: bool,
215 storage: OpenDalStorage,
216}
217
218#[async_trait]
219impl Tool for ExportDelete {
220 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
221 self.run().await.map_err(BoxedError::new)
222 }
223}
224
225impl ExportDelete {
226 async fn run(&self) -> Result<()> {
227 self.run_with_confirmation(confirm_delete).await
228 }
229
230 async fn run_with_confirmation<F>(&self, confirm: F) -> Result<()>
231 where
232 F: FnOnce(&str) -> Result<bool>,
233 {
234 let manifest = self.storage.read_manifest().await?;
235 print_delete_summary(&self.snapshot, &manifest);
236
237 if !self.skip_confirmation && !confirm(&self.snapshot)? {
238 println!("Deletion cancelled.");
239 return Ok(());
240 }
241
242 println!("Deleting snapshot...");
243 self.storage.delete_snapshot().await?;
244 println!("Snapshot deleted successfully.");
245
246 Ok(())
247 }
248}
249
250#[derive(Debug, Parser)]
252pub struct ExportCreateCommand {
253 #[clap(long)]
255 addr: String,
256
257 #[clap(long)]
259 to: String,
260
261 #[clap(long, default_value = "greptime")]
263 catalog: String,
264
265 #[clap(long, value_delimiter = ',')]
268 schemas: Vec<String>,
269
270 #[clap(long)]
272 schema_only: bool,
273
274 #[clap(long)]
276 start_time: Option<String>,
277
278 #[clap(long)]
280 end_time: Option<String>,
281
282 #[clap(long, value_parser = humantime::parse_duration)]
285 chunk_time_window: Option<Duration>,
286
287 #[clap(long, value_enum, default_value = "parquet")]
289 format: DataFormat,
290
291 #[clap(long)]
293 force: bool,
294
295 #[clap(long, default_value = "1")]
297 parallelism: usize,
298
299 #[clap(long)]
301 auth_basic: Option<String>,
302
303 #[clap(long, value_parser = humantime::parse_duration)]
305 timeout: Option<Duration>,
306
307 #[clap(long)]
312 proxy: Option<String>,
313
314 #[clap(long)]
318 no_proxy: bool,
319
320 #[clap(flatten)]
322 storage: ObjectStoreConfig,
323}
324
325impl ExportCreateCommand {
326 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
327 validate_uri(&self.to).map_err(BoxedError::new)?;
329
330 let time_range = TimeRange::parse(self.start_time.as_deref(), self.end_time.as_deref())
331 .map_err(BoxedError::new)?;
332 if self.chunk_time_window.is_some() && !time_range.is_bounded() {
333 return ChunkTimeWindowRequiresBoundsSnafu
334 .fail()
335 .map_err(BoxedError::new);
336 }
337 if self.schema_only {
338 let mut invalid_args = Vec::new();
339 if self.start_time.is_some() {
340 invalid_args.push("--start-time");
341 }
342 if self.end_time.is_some() {
343 invalid_args.push("--end-time");
344 }
345 if self.chunk_time_window.is_some() {
346 invalid_args.push("--chunk-time-window");
347 }
348 if self.format != DataFormat::Parquet {
349 invalid_args.push("--format");
350 }
351 if self.parallelism != 1 {
352 invalid_args.push("--parallelism");
353 }
354 if !invalid_args.is_empty() {
355 return SchemaOnlyArgsNotAllowedSnafu {
356 args: invalid_args.join(", "),
357 }
358 .fail()
359 .map_err(BoxedError::new);
360 }
361 }
362
363 let schemas = if self.schemas.is_empty() {
365 None
366 } else {
367 Some(self.schemas.clone())
368 };
369
370 let storage = OpenDalStorage::from_uri(&self.to, &self.storage).map_err(BoxedError::new)?;
372
373 let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
375 let database_client = DatabaseClient::new(
376 self.addr.clone(),
377 self.catalog.clone(),
378 self.auth_basic.clone(),
379 self.timeout.unwrap_or(Duration::from_secs(60)),
380 proxy,
381 self.no_proxy,
382 );
383
384 Ok(Box::new(ExportCreate {
385 config: ExportConfig {
386 catalog: self.catalog.clone(),
387 schemas,
388 schema_only: self.schema_only,
389 format: self.format,
390 force: self.force,
391 time_range,
392 chunk_time_window: self.chunk_time_window,
393 parallelism: self.parallelism,
394 snapshot_uri: self.to.clone(),
395 storage_config: self.storage.clone(),
396 },
397 storage: Box::new(storage),
398 database_client,
399 }))
400 }
401}
402
403pub struct ExportCreate {
405 config: ExportConfig,
406 storage: Box<dyn SnapshotStorage>,
407 database_client: DatabaseClient,
408}
409
410struct ExportConfig {
411 catalog: String,
412 schemas: Option<Vec<String>>,
413 schema_only: bool,
414 format: DataFormat,
415 force: bool,
416 time_range: TimeRange,
417 chunk_time_window: Option<Duration>,
418 parallelism: usize,
419 snapshot_uri: String,
420 storage_config: ObjectStoreConfig,
421}
422
423#[async_trait]
424impl Tool for ExportCreate {
425 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
426 self.run().await.map_err(BoxedError::new)
427 }
428}
429
430impl ExportCreate {
431 async fn run(&self) -> Result<()> {
432 let exists = self.storage.exists().await?;
434
435 if exists {
436 if self.config.force {
437 info!("Deleting existing snapshot (--force)");
438 self.storage.delete_snapshot().await?;
439 } else {
440 let mut manifest = self.storage.read_manifest().await?;
442
443 if manifest.version != MANIFEST_VERSION {
445 return ManifestVersionMismatchSnafu {
446 expected: MANIFEST_VERSION,
447 found: manifest.version,
448 }
449 .fail();
450 }
451
452 validate_resume_config(&manifest, &self.config)?;
453
454 info!(
455 "Resuming existing snapshot: {} (completed: {}/{} chunks)",
456 manifest.snapshot_id,
457 manifest.completed_count(),
458 manifest.chunks.len()
459 );
460
461 if manifest.is_complete() {
462 info!("Snapshot is already complete");
463 return Ok(());
464 }
465
466 if manifest.schema_only {
467 return Ok(());
468 }
469
470 export_data(
471 self.storage.as_ref(),
472 &self.database_client,
473 &self.config.snapshot_uri,
474 &self.config.storage_config,
475 &mut manifest,
476 self.config.parallelism,
477 )
478 .await?;
479 return Ok(());
480 }
481 }
482
483 let extractor = SchemaExtractor::new(&self.database_client, &self.config.catalog);
485 let schema_snapshot = extractor.extract(self.config.schemas.as_deref()).await?;
486
487 let schema_names: Vec<String> = schema_snapshot
488 .schemas
489 .iter()
490 .map(|s| s.name.clone())
491 .collect();
492 info!("Exporting schemas: {:?}", schema_names);
493
494 let mut manifest = Manifest::new_for_export(
496 self.config.catalog.clone(),
497 schema_names.clone(),
498 self.config.schema_only,
499 self.config.time_range.clone(),
500 self.config.format,
501 self.config.chunk_time_window,
502 )?;
503
504 self.storage.write_schema(&schema_snapshot).await?;
506 info!("Exported {} schemas", schema_snapshot.schemas.len());
507
508 let ddl_by_schema = self.build_ddl_by_schema(&schema_names).await?;
510 for (schema, ddl) in ddl_by_schema {
511 let ddl_path = ddl_path_for_schema(&schema);
512 self.storage.write_text(&ddl_path, &ddl).await?;
513 info!("Exported DDL for schema {} to {}", schema, ddl_path);
514 }
515
516 self.storage.write_manifest(&manifest).await?;
524 info!("Snapshot created: {}", manifest.snapshot_id);
525
526 if !self.config.schema_only {
527 export_data(
528 self.storage.as_ref(),
529 &self.database_client,
530 &self.config.snapshot_uri,
531 &self.config.storage_config,
532 &mut manifest,
533 self.config.parallelism,
534 )
535 .await?;
536 }
537
538 Ok(())
539 }
540
541 async fn build_ddl_by_schema(&self, schema_names: &[String]) -> Result<Vec<(String, String)>> {
542 let mut schemas = schema_names.to_vec();
543 schemas.sort();
544
545 let mut ddl_by_schema = Vec::with_capacity(schemas.len());
546 for schema in schemas {
547 let create_database = self.show_create("DATABASE", &schema, None).await?;
548
549 let (mut physical_tables, mut tables, mut views) =
550 self.get_schema_objects(&schema).await?;
551 physical_tables.sort();
552 let mut physical_ddls = Vec::with_capacity(physical_tables.len());
553 for table in physical_tables {
554 physical_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
555 }
556
557 tables.sort();
558 let mut table_ddls = Vec::with_capacity(tables.len());
559 for table in tables {
560 table_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
561 }
562
563 views.sort();
564 let mut view_ddls = Vec::with_capacity(views.len());
565 for view in views {
566 view_ddls.push(self.show_create("VIEW", &schema, Some(&view)).await?);
567 }
568
569 let ddl = build_schema_ddl(
570 &schema,
571 create_database,
572 physical_ddls,
573 table_ddls,
574 view_ddls,
575 );
576 ddl_by_schema.push((schema, ddl));
577 }
578
579 Ok(ddl_by_schema)
580 }
581
582 async fn get_schema_objects(
583 &self,
584 schema: &str,
585 ) -> Result<(Vec<String>, Vec<String>, Vec<String>)> {
586 let physical_tables = self.get_metric_physical_tables(schema).await?;
587 let physical_set: HashSet<&str> = physical_tables.iter().map(String::as_str).collect();
588 let sql = format!(
589 "SELECT table_name, table_type FROM information_schema.tables \
590 WHERE table_catalog = '{}' AND table_schema = '{}' \
591 AND (table_type = 'BASE TABLE' OR table_type = 'VIEW')",
592 escape_sql_literal(&self.config.catalog),
593 escape_sql_literal(schema)
594 );
595 let records: Option<Vec<Vec<Value>>> = self
596 .database_client
597 .sql_in_public(&sql)
598 .await
599 .context(DatabaseSnafu)?;
600
601 let mut tables = Vec::new();
602 let mut views = Vec::new();
603 if let Some(rows) = records {
604 for row in rows {
605 let name = match row.first() {
606 Some(Value::String(name)) => name.clone(),
607 _ => return UnexpectedValueTypeSnafu.fail(),
608 };
609 let table_type = match row.get(1) {
610 Some(Value::String(table_type)) => table_type.as_str(),
611 _ => return UnexpectedValueTypeSnafu.fail(),
612 };
613 if !physical_set.contains(name.as_str()) {
614 if table_type == "VIEW" {
615 views.push(name);
616 } else {
617 tables.push(name);
618 }
619 }
620 }
621 }
622
623 Ok((physical_tables, tables, views))
624 }
625
626 async fn get_metric_physical_tables(&self, schema: &str) -> Result<Vec<String>> {
627 let sql = format!(
628 "SELECT DISTINCT table_name FROM information_schema.columns \
629 WHERE table_catalog = '{}' AND table_schema = '{}' AND column_name = '__tsid'",
630 escape_sql_literal(&self.config.catalog),
631 escape_sql_literal(schema)
632 );
633 let records: Option<Vec<Vec<Value>>> = self
634 .database_client
635 .sql_in_public(&sql)
636 .await
637 .context(DatabaseSnafu)?;
638
639 let mut tables = HashSet::new();
640 if let Some(rows) = records {
641 for row in rows {
642 let name = match row.first() {
643 Some(Value::String(name)) => name.clone(),
644 _ => return UnexpectedValueTypeSnafu.fail(),
645 };
646 tables.insert(name);
647 }
648 }
649
650 Ok(tables.into_iter().collect())
651 }
652
653 async fn show_create(
654 &self,
655 show_type: &str,
656 schema: &str,
657 table: Option<&str>,
658 ) -> Result<String> {
659 let sql = match table {
660 Some(table) => format!(
661 r#"SHOW CREATE {} "{}"."{}"."{}""#,
662 show_type,
663 escape_sql_identifier(&self.config.catalog),
664 escape_sql_identifier(schema),
665 escape_sql_identifier(table)
666 ),
667 None => format!(
668 r#"SHOW CREATE {} "{}"."{}""#,
669 show_type,
670 escape_sql_identifier(&self.config.catalog),
671 escape_sql_identifier(schema)
672 ),
673 };
674
675 let records: Option<Vec<Vec<Value>>> = self
676 .database_client
677 .sql_in_public(&sql)
678 .await
679 .context(DatabaseSnafu)?;
680 let rows = records.context(EmptyResultSnafu)?;
681 let row = rows.first().context(EmptyResultSnafu)?;
682 let Some(Value::String(create)) = row.get(1) else {
683 return UnexpectedValueTypeSnafu.fail();
684 };
685
686 Ok(format!("{};\n", create))
687 }
688}
689
690fn build_schema_ddl(
691 schema: &str,
692 create_database: String,
693 physical_tables: Vec<String>,
694 tables: Vec<String>,
695 views: Vec<String>,
696) -> String {
697 let mut ddl = String::new();
698 ddl.push_str(&format!("-- Schema: {}\n", schema));
699 ddl.push_str(&create_database);
700 for stmt in physical_tables {
701 ddl.push_str(&stmt);
702 }
703 for stmt in tables {
704 ddl.push_str(&stmt);
705 }
706 for stmt in views {
707 ddl.push_str(&stmt);
708 }
709 ddl.push('\n');
710 ddl
711}
712
713fn validate_resume_config(manifest: &Manifest, config: &ExportConfig) -> Result<()> {
714 if manifest.schema_only != config.schema_only {
715 return SchemaOnlyModeMismatchSnafu {
716 existing_schema_only: manifest.schema_only,
717 requested_schema_only: config.schema_only,
718 }
719 .fail();
720 }
721
722 if manifest.catalog != config.catalog {
723 return ResumeConfigMismatchSnafu {
724 field: "catalog",
725 existing: manifest.catalog.clone(),
726 requested: config.catalog.clone(),
727 }
728 .fail();
729 }
730
731 if let Some(requested_schemas) = &config.schemas
734 && !schema_selection_matches(&manifest.schemas, requested_schemas)
735 {
736 return ResumeConfigMismatchSnafu {
737 field: "schemas",
738 existing: format_schema_selection(&manifest.schemas),
739 requested: format_schema_selection(requested_schemas),
740 }
741 .fail();
742 }
743
744 if manifest.time_range != config.time_range {
745 return ResumeConfigMismatchSnafu {
746 field: "time_range",
747 existing: format!("{:?}", manifest.time_range),
748 requested: format!("{:?}", config.time_range),
749 }
750 .fail();
751 }
752
753 if manifest.format != config.format {
754 return ResumeConfigMismatchSnafu {
755 field: "format",
756 existing: manifest.format.to_string(),
757 requested: config.format.to_string(),
758 }
759 .fail();
760 }
761
762 let expected_plan = Manifest::new_for_export(
763 manifest.catalog.clone(),
764 manifest.schemas.clone(),
765 config.schema_only,
766 config.time_range.clone(),
767 config.format,
768 config.chunk_time_window,
769 )?;
770 if !chunk_plan_matches(manifest, &expected_plan) {
771 return ResumeConfigMismatchSnafu {
772 field: "chunk plan",
773 existing: format_chunk_plan(&manifest.chunks),
774 requested: format_chunk_plan(&expected_plan.chunks),
775 }
776 .fail();
777 }
778
779 Ok(())
780}
781
782fn schema_selection_matches(existing: &[String], requested: &[String]) -> bool {
783 canonical_schema_selection(existing) == canonical_schema_selection(requested)
784}
785
786fn canonical_schema_selection(schemas: &[String]) -> Vec<String> {
787 let mut canonicalized = Vec::new();
788 let mut seen = HashSet::new();
789
790 for schema in schemas {
791 let normalized = schema.to_ascii_lowercase();
792 if seen.insert(normalized.clone()) {
793 canonicalized.push(normalized);
794 }
795 }
796
797 canonicalized.sort();
798 canonicalized
799}
800
801fn format_schema_selection(schemas: &[String]) -> String {
802 format!("[{}]", schemas.join(", "))
803}
804
805fn chunk_plan_matches(existing: &Manifest, expected: &Manifest) -> bool {
806 existing.chunks.len() == expected.chunks.len()
807 && existing
808 .chunks
809 .iter()
810 .zip(&expected.chunks)
811 .all(|(left, right)| left.id == right.id && left.time_range == right.time_range)
812}
813
814fn format_chunk_plan(chunks: &[ChunkMeta]) -> String {
815 let items = chunks
816 .iter()
817 .map(|chunk| format!("#{}:{:?}", chunk.id, chunk.time_range))
818 .collect::<Vec<_>>();
819 format!("[{}]", items.join(", "))
820}
821
822#[derive(Debug)]
823struct SnapshotListEntry {
824 path: String,
825 manifest: Manifest,
826}
827
828#[derive(Debug, Default)]
829struct SnapshotScanResult {
830 snapshots: Vec<SnapshotListEntry>,
831 unreadable: Vec<String>,
832}
833
834async fn scan_snapshots(storage: &OpenDalStorage) -> Result<SnapshotScanResult> {
835 let mut result = SnapshotScanResult::default();
836 for dir in storage.list_direct_child_dirs().await? {
837 let manifest_path = format!("{}/{}", dir.trim_matches('/'), MANIFEST_FILE);
838 let Some(data) = storage.read_file_if_exists(&manifest_path).await? else {
839 continue;
840 };
841
842 match serde_json::from_slice::<Manifest>(&data) {
843 Ok(manifest) => result.snapshots.push(SnapshotListEntry {
844 path: format!("{}/", dir.trim_matches('/')),
845 manifest,
846 }),
847 Err(_) => result
848 .unreadable
849 .push(format!("{}/", dir.trim_matches('/'))),
850 }
851 }
852
853 result
854 .snapshots
855 .sort_by_key(|entry| std::cmp::Reverse(entry.manifest.created_at));
856 result.unreadable.sort();
857 Ok(result)
858}
859
860fn print_snapshot_list(snapshots: &[SnapshotListEntry], unreadable_count: usize) {
861 if unreadable_count == 0 {
862 println!("Found {} snapshots:", snapshots.len());
863 } else {
864 println!(
865 "Found {} snapshots ({} {} skipped: unreadable manifest):",
866 snapshots.len(),
867 unreadable_count,
868 directory_word(unreadable_count)
869 );
870 }
871 println!();
872 println!(
873 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} Status",
874 "Path", "ID", "Created", "Catalog", "Schemas", "Chunks"
875 );
876 println!(
877 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} {:<10}",
878 "-".repeat(24),
879 "-".repeat(36),
880 "-".repeat(19),
881 "-".repeat(9),
882 "-".repeat(7),
883 "-".repeat(6),
884 "-".repeat(10)
885 );
886 for entry in snapshots {
887 let manifest = &entry.manifest;
888 println!(
889 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} {}",
890 entry.path,
891 manifest.snapshot_id,
892 manifest.created_at.format("%Y-%m-%d %H:%M:%S"),
893 manifest.catalog,
894 manifest.schemas.len(),
895 format_list_chunks(manifest),
896 snapshot_status(manifest)
897 );
898 }
899}
900
901fn print_unreadable_warnings(unreadable: &[String]) {
902 if unreadable.is_empty() {
903 return;
904 }
905
906 println!();
907 println!(
908 "Warning: {} {} had corrupt/unreadable manifest.json:",
909 unreadable.len(),
910 directory_word(unreadable.len())
911 );
912 for path in unreadable {
913 println!(" - {}", path);
914 }
915}
916
917fn directory_word(count: usize) -> &'static str {
918 if count == 1 {
919 "directory"
920 } else {
921 "directories"
922 }
923}
924
925fn snapshot_status(manifest: &Manifest) -> &'static str {
926 if manifest.schema_only {
927 "schema-only"
928 } else if manifest.is_complete() {
929 "complete"
930 } else {
931 "incomplete"
932 }
933}
934
935fn format_list_chunks(manifest: &Manifest) -> String {
936 let total = manifest.chunks.len();
937 if total == 0 {
938 return "0".to_string();
939 }
940
941 format!(
942 "{}/{}",
943 manifest.completed_count() + manifest.skipped_count(),
944 total
945 )
946}
947
948#[derive(Debug, Clone, Copy, PartialEq, Eq)]
949enum VerifySeverity {
950 Error,
951 Warn,
952}
953
954impl VerifySeverity {
955 fn as_str(self) -> &'static str {
956 match self {
957 VerifySeverity::Error => "ERROR",
958 VerifySeverity::Warn => "WARN",
959 }
960 }
961}
962
963#[derive(Debug)]
964struct VerifyProblem {
965 severity: VerifySeverity,
966 message: String,
967}
968
969#[derive(Debug, Default)]
970struct VerifyChunkSummary {
971 total: usize,
972 completed: usize,
973 skipped: usize,
974 pending: usize,
975 in_progress: usize,
976 failed: usize,
977}
978
979#[derive(Debug)]
980struct VerifyReport {
981 manifest: Manifest,
982 schema_index_exists: bool,
983 ddl_file_count: usize,
984 chunk_summary: VerifyChunkSummary,
985 data_files_total: usize,
986 data_files_verified: usize,
987 problems: Vec<VerifyProblem>,
988}
989
990impl VerifyReport {
991 fn error_count(&self) -> usize {
992 self.problems
993 .iter()
994 .filter(|problem| problem.severity == VerifySeverity::Error)
995 .count()
996 }
997
998 fn warning_count(&self) -> usize {
999 self.problems
1000 .iter()
1001 .filter(|problem| problem.severity == VerifySeverity::Warn)
1002 .count()
1003 }
1004
1005 fn has_problems(&self) -> bool {
1006 !self.problems.is_empty()
1007 }
1008
1009 fn push_error(&mut self, message: impl Into<String>) {
1010 self.problems.push(VerifyProblem {
1011 severity: VerifySeverity::Error,
1012 message: message.into(),
1013 });
1014 }
1015
1016 fn push_warn(&mut self, message: impl Into<String>) {
1017 self.problems.push(VerifyProblem {
1018 severity: VerifySeverity::Warn,
1019 message: message.into(),
1020 });
1021 }
1022}
1023
1024async fn verify_snapshot(storage: &OpenDalStorage) -> Result<VerifyReport> {
1025 let manifest = storage.read_manifest().await?;
1026 let schema_index_path = format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE);
1027 let ddl_prefix = format!("{}/{}/", SCHEMA_DIR, DDL_DIR);
1028 let schema_index_exists = storage.file_exists(&schema_index_path).await?;
1029 let ddl_files: HashSet<_> = storage
1030 .list_files_recursive(&ddl_prefix)
1031 .await?
1032 .into_iter()
1033 .collect();
1034 let ddl_file_count = ddl_files
1035 .iter()
1036 .filter(|path| path.ends_with(".sql"))
1037 .count();
1038
1039 let mut report = VerifyReport {
1040 manifest,
1041 schema_index_exists,
1042 ddl_file_count,
1043 chunk_summary: VerifyChunkSummary::default(),
1044 data_files_total: 0,
1045 data_files_verified: 0,
1046 problems: Vec::new(),
1047 };
1048
1049 if report.manifest.version != MANIFEST_VERSION {
1050 report.push_error(format!(
1051 "Manifest version mismatch: expected {}, found {}",
1052 MANIFEST_VERSION, report.manifest.version
1053 ));
1054 }
1055
1056 if !report.schema_index_exists {
1057 report.push_warn(format!("Missing schema index '{}'", schema_index_path));
1058 }
1059
1060 for schema in &report.manifest.schemas {
1061 let ddl_path = ddl_path_for_schema(schema);
1062 if !ddl_files.contains(ddl_path.as_str()) {
1063 report.problems.push(VerifyProblem {
1064 severity: VerifySeverity::Error,
1065 message: format!("Schema '{}': missing DDL file '{}'", schema, ddl_path),
1066 });
1067 }
1068 }
1069
1070 report.chunk_summary = summarize_chunks(&report.manifest);
1071 if report.manifest.schema_only {
1072 let chunk_count = report.manifest.chunks.len();
1073 if chunk_count > 0 {
1074 report.push_error(format!(
1075 "Schema-only snapshot should not contain data chunks (found {})",
1076 chunk_count
1077 ));
1078 }
1079 let data_files = storage.list_files_recursive("data/").await?;
1080 if let Some(path) = data_files.first() {
1081 report.push_error(format!(
1082 "Schema-only snapshot should not contain data files (found '{}')",
1083 path
1084 ));
1085 }
1086 } else if report.manifest.chunks.is_empty() {
1087 report.push_error("Full snapshot should contain at least one data chunk");
1088 } else {
1089 verify_chunks_and_data_files(storage, &mut report).await?;
1090 }
1091
1092 Ok(report)
1093}
1094
1095fn summarize_chunks(manifest: &Manifest) -> VerifyChunkSummary {
1096 VerifyChunkSummary {
1097 total: manifest.chunks.len(),
1098 completed: manifest.completed_count(),
1099 skipped: manifest.skipped_count(),
1100 pending: manifest.pending_count(),
1101 in_progress: manifest.in_progress_count(),
1102 failed: manifest.failed_count(),
1103 }
1104}
1105
1106async fn verify_chunks_and_data_files(
1107 storage: &OpenDalStorage,
1108 report: &mut VerifyReport,
1109) -> Result<()> {
1110 let existing_files: HashSet<_> = storage
1111 .list_files_recursive("data/")
1112 .await?
1113 .into_iter()
1114 .collect();
1115 let mut data_files_total = 0;
1116 let mut data_files_verified = 0;
1117 let mut problems = Vec::new();
1118 let mut seen_chunk_ids = HashSet::new();
1119 let mut claimed_data_files = HashSet::new();
1120
1121 for chunk in &report.manifest.chunks {
1122 if !seen_chunk_ids.insert(chunk.id) {
1123 problems.push(VerifyProblem {
1124 severity: VerifySeverity::Error,
1125 message: format!("Chunk {}: duplicate chunk id", chunk.id),
1126 });
1127 }
1128 for file in &chunk.files {
1129 if let Some(path) = safe_manifest_data_file_path(file) {
1130 claimed_data_files.insert(path.to_string());
1131 }
1132 }
1133
1134 match chunk.status {
1135 ChunkStatus::Completed => {
1136 if chunk.files.is_empty() {
1137 problems.push(VerifyProblem {
1138 severity: VerifySeverity::Error,
1139 message: format!("Chunk {}: completed chunk has no data files", chunk.id),
1140 });
1141 continue;
1142 }
1143 let allowed_prefixes = report
1144 .manifest
1145 .schemas
1146 .iter()
1147 .map(|schema| data_dir_for_schema_chunk(schema, chunk.id))
1148 .collect::<Vec<_>>();
1149 for file in &chunk.files {
1150 data_files_total += 1;
1151 let Some(path) = valid_manifest_data_file_path(file, &allowed_prefixes) else {
1152 problems.push(VerifyProblem {
1153 severity: VerifySeverity::Error,
1154 message: format!(
1155 "Chunk {}: invalid data file path '{}'",
1156 chunk.id, file
1157 ),
1158 });
1159 continue;
1160 };
1161
1162 if existing_files.contains(path) {
1163 data_files_verified += 1;
1164 } else {
1165 problems.push(VerifyProblem {
1166 severity: VerifySeverity::Error,
1167 message: format!("Chunk {}: missing file '{}'", chunk.id, path),
1168 });
1169 }
1170 }
1171 }
1172 ChunkStatus::Skipped => {
1173 if !chunk.files.is_empty() {
1174 problems.push(VerifyProblem {
1175 severity: VerifySeverity::Error,
1176 message: format!(
1177 "Chunk {}: skipped chunk should not list data files",
1178 chunk.id
1179 ),
1180 });
1181 }
1182 }
1183 ChunkStatus::Pending => {
1184 problems.push(VerifyProblem {
1185 severity: VerifySeverity::Error,
1186 message: format!("Chunk {}: status is 'pending'", chunk.id),
1187 });
1188 }
1189 ChunkStatus::InProgress => {
1190 problems.push(VerifyProblem {
1191 severity: VerifySeverity::Error,
1192 message: format!("Chunk {}: status is 'in_progress'", chunk.id),
1193 });
1194 }
1195 ChunkStatus::Failed => {
1196 let reason = chunk.error.as_deref().unwrap_or("unknown error");
1197 problems.push(VerifyProblem {
1198 severity: VerifySeverity::Error,
1199 message: format!("Chunk {}: status is 'failed' (error: {})", chunk.id, reason),
1200 });
1201 }
1202 }
1203 }
1204
1205 for path in &existing_files {
1206 if !claimed_data_files.contains(path) {
1207 problems.push(VerifyProblem {
1208 severity: VerifySeverity::Error,
1209 message: format!("Unexpected data file '{}' is not listed in manifest", path),
1210 });
1211 }
1212 }
1213
1214 report.data_files_total = data_files_total;
1215 report.data_files_verified = data_files_verified;
1216 report.problems.extend(problems);
1217
1218 Ok(())
1219}
1220
1221fn valid_manifest_data_file_path<'a>(
1222 path: &'a str,
1223 allowed_prefixes: &[String],
1224) -> Option<&'a str> {
1225 let normalized = safe_manifest_data_file_path(path)?;
1226
1227 if !allowed_prefixes
1228 .iter()
1229 .any(|prefix| normalized.starts_with(prefix))
1230 {
1231 return None;
1232 }
1233
1234 Some(normalized)
1235}
1236
1237fn safe_manifest_data_file_path(path: &str) -> Option<&str> {
1238 let normalized = path.trim_start_matches('/');
1239 if normalized.is_empty() || !normalized.starts_with("data/") {
1240 return None;
1241 }
1242
1243 if normalized
1244 .split('/')
1245 .any(|segment| segment.is_empty() || segment == "." || segment == "..")
1246 {
1247 return None;
1248 }
1249
1250 Some(normalized)
1251}
1252
1253fn print_verify_report(snapshot: &str, report: &VerifyReport) {
1254 println!("Verifying snapshot: {}", report.manifest.snapshot_id);
1255 println!(" Location: {}", snapshot);
1256 if report.manifest.version == MANIFEST_VERSION {
1257 println!(" Manifest: OK (version {})", report.manifest.version);
1258 } else {
1259 println!(
1260 " Manifest: ERROR (version {}, expected {})",
1261 report.manifest.version, MANIFEST_VERSION
1262 );
1263 }
1264 println!(
1265 " Schema files: {}",
1266 if report.schema_index_exists {
1267 format!("OK ({})", SCHEMAS_FILE)
1268 } else {
1269 format!("WARN (missing {})", SCHEMAS_FILE)
1270 }
1271 );
1272 if report.ddl_file_count > 0 {
1273 println!(" DDL files: {} file(s) found", report.ddl_file_count);
1274 } else {
1275 println!(" DDL files: not present");
1276 }
1277
1278 let chunks = &report.chunk_summary;
1279 println!(
1280 " Chunks: {} total ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1281 chunks.total,
1282 chunks.completed,
1283 chunks.skipped,
1284 chunks.pending,
1285 chunks.in_progress,
1286 chunks.failed
1287 );
1288
1289 if report.manifest.schema_only {
1290 println!(" Data files: skipped (schema-only)");
1291 } else {
1292 println!(
1293 " Data files: {}/{} files verified",
1294 report.data_files_verified, report.data_files_total
1295 );
1296 }
1297
1298 if report.problems.is_empty() {
1299 println!();
1300 println!("Snapshot is valid.");
1301 return;
1302 }
1303
1304 println!();
1305 println!("Problems found:");
1306 for problem in &report.problems {
1307 println!(" [{}] {}", problem.severity.as_str(), problem.message);
1308 }
1309 println!();
1310 println!(
1311 "Snapshot has {} error(s), {} warning(s).",
1312 report.error_count(),
1313 report.warning_count()
1314 );
1315}
1316
1317fn print_delete_summary(snapshot: &str, manifest: &Manifest) {
1318 println!("Snapshot: {}", manifest.snapshot_id);
1319 println!(" Location: {}", snapshot);
1320 println!(
1321 " Created: {} UTC",
1322 manifest.created_at.format("%Y-%m-%d %H:%M:%S")
1323 );
1324 println!(" Catalog: {}", manifest.catalog);
1325 println!(" Schemas: {}", manifest.schemas.join(", "));
1326 println!(" Chunks: {}", format_delete_chunks(manifest));
1327}
1328
1329fn format_delete_chunks(manifest: &Manifest) -> String {
1330 if manifest.schema_only {
1331 return "0 (schema-only)".to_string();
1332 }
1333
1334 let summary = summarize_chunks(manifest);
1335 if manifest.is_complete() {
1336 format!("{} (all processed)", summary.total)
1337 } else {
1338 format!(
1339 "{} ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1340 summary.total,
1341 summary.completed,
1342 summary.skipped,
1343 summary.pending,
1344 summary.in_progress,
1345 summary.failed
1346 )
1347 }
1348}
1349
1350fn confirm_delete(snapshot: &str) -> Result<bool> {
1351 println!();
1352 println!(
1353 "Warning: this removes the entire snapshot directory/prefix, not only files listed in manifest."
1354 );
1355 println!("This will permanently delete all data under:");
1356 println!(" {}", display_snapshot_prefix(snapshot));
1357 print!("Type 'yes' to confirm deletion: ");
1358 io::stdout().flush().map_err(|error| {
1359 IoSnafu {
1360 operation: "flushing delete confirmation prompt",
1361 error,
1362 }
1363 .build()
1364 })?;
1365
1366 let mut input = String::new();
1367 io::stdin().read_line(&mut input).map_err(|error| {
1368 IoSnafu {
1369 operation: "reading delete confirmation",
1370 error,
1371 }
1372 .build()
1373 })?;
1374
1375 Ok(delete_confirmation_matches(&input))
1376}
1377
1378fn delete_confirmation_matches(input: &str) -> bool {
1379 input.trim() == "yes"
1380}
1381
1382fn display_snapshot_prefix(snapshot: &str) -> String {
1383 if snapshot.ends_with('/') {
1384 snapshot.to_string()
1385 } else {
1386 format!("{}/", snapshot)
1387 }
1388}
1389
1390#[cfg(test)]
1391mod tests {
1392 use chrono::TimeZone;
1393 use clap::Parser;
1394 use tempfile::tempdir;
1395 use url::Url;
1396
1397 use super::*;
1398 use crate::data::path::ddl_path_for_schema;
1399
1400 #[test]
1401 fn test_ddl_path_for_schema() {
1402 assert_eq!(ddl_path_for_schema("public"), "schema/ddl/public.sql");
1403 assert_eq!(
1404 ddl_path_for_schema("../evil"),
1405 "schema/ddl/%2E%2E%2Fevil.sql"
1406 );
1407 }
1408
1409 #[test]
1410 fn test_build_schema_ddl_order() {
1411 let ddl = build_schema_ddl(
1412 "public",
1413 "CREATE DATABASE public;\n".to_string(),
1414 vec!["PHYSICAL;\n".to_string()],
1415 vec!["TABLE;\n".to_string()],
1416 vec!["VIEW;\n".to_string()],
1417 );
1418
1419 let db_pos = ddl.find("CREATE DATABASE").unwrap();
1420 let physical_pos = ddl.find("PHYSICAL;").unwrap();
1421 let table_pos = ddl.find("TABLE;").unwrap();
1422 let view_pos = ddl.find("VIEW;").unwrap();
1423 assert!(db_pos < physical_pos);
1424 assert!(physical_pos < table_pos);
1425 assert!(table_pos < view_pos);
1426 }
1427
1428 #[tokio::test]
1429 async fn test_build_rejects_chunk_window_without_bounds() {
1430 let cmd = ExportCreateCommand::parse_from([
1431 "export-v2-create",
1432 "--addr",
1433 "127.0.0.1:4000",
1434 "--to",
1435 "file:///tmp/export-v2-test",
1436 "--chunk-time-window",
1437 "1h",
1438 ]);
1439
1440 let result = cmd.build().await;
1441 assert!(result.is_err());
1442 let error = result.err().unwrap().to_string();
1443
1444 assert!(error.contains("chunk_time_window requires both --start-time and --end-time"));
1445 }
1446
1447 #[tokio::test]
1448 async fn test_build_rejects_data_export_args_in_schema_only_mode() {
1449 let cmd = ExportCreateCommand::parse_from([
1450 "export-v2-create",
1451 "--addr",
1452 "127.0.0.1:4000",
1453 "--to",
1454 "file:///tmp/export-v2-test",
1455 "--schema-only",
1456 "--start-time",
1457 "2024-01-01T00:00:00Z",
1458 "--end-time",
1459 "2024-01-02T00:00:00Z",
1460 "--chunk-time-window",
1461 "1h",
1462 "--format",
1463 "csv",
1464 "--parallelism",
1465 "2",
1466 ]);
1467
1468 let error = cmd.build().await.err().unwrap().to_string();
1469
1470 assert!(error.contains("--schema-only cannot be used with data export arguments"));
1471 assert!(error.contains("--start-time"));
1472 assert!(error.contains("--end-time"));
1473 assert!(error.contains("--chunk-time-window"));
1474 assert!(error.contains("--format"));
1475 assert!(error.contains("--parallelism"));
1476 }
1477
1478 #[test]
1479 fn test_schema_only_mode_mismatch_error_message() {
1480 let error = crate::data::export_v2::error::SchemaOnlyModeMismatchSnafu {
1481 existing_schema_only: false,
1482 requested_schema_only: true,
1483 }
1484 .build()
1485 .to_string();
1486
1487 assert!(error.contains("existing: false"));
1488 assert!(error.contains("requested: true"));
1489 }
1490
1491 #[test]
1492 fn test_validate_resume_config_rejects_catalog_mismatch() {
1493 let manifest = Manifest::new_for_export(
1494 "greptime".to_string(),
1495 vec!["public".to_string()],
1496 false,
1497 TimeRange::unbounded(),
1498 DataFormat::Parquet,
1499 None,
1500 )
1501 .unwrap();
1502 let config = ExportConfig {
1503 catalog: "other".to_string(),
1504 schemas: None,
1505 schema_only: false,
1506 format: DataFormat::Parquet,
1507 force: false,
1508 time_range: TimeRange::unbounded(),
1509 chunk_time_window: None,
1510 parallelism: 1,
1511 snapshot_uri: "file:///tmp/snapshot".to_string(),
1512 storage_config: ObjectStoreConfig::default(),
1513 };
1514
1515 let error = validate_resume_config(&manifest, &config)
1516 .err()
1517 .unwrap()
1518 .to_string();
1519 assert!(error.contains("catalog"));
1520 }
1521
1522 #[test]
1523 fn test_validate_resume_config_accepts_schema_selection_with_different_case_and_order() {
1524 let manifest = Manifest::new_for_export(
1525 "greptime".to_string(),
1526 vec!["public".to_string(), "analytics".to_string()],
1527 false,
1528 TimeRange::unbounded(),
1529 DataFormat::Parquet,
1530 None,
1531 )
1532 .unwrap();
1533 let config = ExportConfig {
1534 catalog: "greptime".to_string(),
1535 schemas: Some(vec![
1536 "ANALYTICS".to_string(),
1537 "PUBLIC".to_string(),
1538 "public".to_string(),
1539 ]),
1540 schema_only: false,
1541 format: DataFormat::Parquet,
1542 force: false,
1543 time_range: TimeRange::unbounded(),
1544 chunk_time_window: None,
1545 parallelism: 1,
1546 snapshot_uri: "file:///tmp/snapshot".to_string(),
1547 storage_config: ObjectStoreConfig::default(),
1548 };
1549
1550 assert!(validate_resume_config(&manifest, &config).is_ok());
1551 }
1552
1553 #[test]
1554 fn test_validate_resume_config_rejects_chunk_plan_mismatch() {
1555 let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1556 let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 2, 0, 0).unwrap();
1557 let time_range = TimeRange::new(Some(start), Some(end));
1558 let manifest = Manifest::new_for_export(
1559 "greptime".to_string(),
1560 vec!["public".to_string()],
1561 false,
1562 time_range.clone(),
1563 DataFormat::Parquet,
1564 None,
1565 )
1566 .unwrap();
1567 let config = ExportConfig {
1568 catalog: "greptime".to_string(),
1569 schemas: None,
1570 schema_only: false,
1571 format: DataFormat::Parquet,
1572 force: false,
1573 time_range,
1574 chunk_time_window: Some(Duration::from_secs(3600)),
1575 parallelism: 1,
1576 snapshot_uri: "file:///tmp/snapshot".to_string(),
1577 storage_config: ObjectStoreConfig::default(),
1578 };
1579
1580 let error = validate_resume_config(&manifest, &config)
1581 .err()
1582 .unwrap()
1583 .to_string();
1584 assert!(error.contains("chunk plan"));
1585 }
1586
1587 #[test]
1588 fn test_validate_resume_config_rejects_format_mismatch() {
1589 let manifest = Manifest::new_for_export(
1590 "greptime".to_string(),
1591 vec!["public".to_string()],
1592 false,
1593 TimeRange::unbounded(),
1594 DataFormat::Parquet,
1595 None,
1596 )
1597 .unwrap();
1598 let config = ExportConfig {
1599 catalog: "greptime".to_string(),
1600 schemas: None,
1601 schema_only: false,
1602 format: DataFormat::Csv,
1603 force: false,
1604 time_range: TimeRange::unbounded(),
1605 chunk_time_window: None,
1606 parallelism: 1,
1607 snapshot_uri: "file:///tmp/snapshot".to_string(),
1608 storage_config: ObjectStoreConfig::default(),
1609 };
1610
1611 let error = validate_resume_config(&manifest, &config)
1612 .err()
1613 .unwrap()
1614 .to_string();
1615 assert!(error.contains("format"));
1616 }
1617
1618 #[test]
1619 fn test_validate_resume_config_rejects_time_range_mismatch() {
1620 let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1621 let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 1, 0, 0).unwrap();
1622 let manifest = Manifest::new_for_export(
1623 "greptime".to_string(),
1624 vec!["public".to_string()],
1625 false,
1626 TimeRange::new(Some(start), Some(end)),
1627 DataFormat::Parquet,
1628 None,
1629 )
1630 .unwrap();
1631 let config = ExportConfig {
1632 catalog: "greptime".to_string(),
1633 schemas: None,
1634 schema_only: false,
1635 format: DataFormat::Parquet,
1636 force: false,
1637 time_range: TimeRange::new(Some(start), Some(start)),
1638 chunk_time_window: None,
1639 parallelism: 1,
1640 snapshot_uri: "file:///tmp/snapshot".to_string(),
1641 storage_config: ObjectStoreConfig::default(),
1642 };
1643
1644 let error = validate_resume_config(&manifest, &config)
1645 .err()
1646 .unwrap()
1647 .to_string();
1648 assert!(error.contains("time_range"));
1649 }
1650
1651 #[tokio::test]
1652 async fn test_scan_snapshots_sorts_and_tracks_unreadable_manifests() {
1653 let dir = tempdir().unwrap();
1654 write_test_manifest(
1655 dir.path(),
1656 "older",
1657 test_manifest(
1658 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1659 false,
1660 true,
1661 ),
1662 );
1663 write_test_manifest(
1664 dir.path(),
1665 "newer",
1666 test_manifest(
1667 chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap(),
1668 false,
1669 true,
1670 ),
1671 );
1672
1673 std::fs::create_dir_all(dir.path().join("empty-dir")).unwrap();
1674 std::fs::create_dir_all(dir.path().join("not-snapshot")).unwrap();
1675 std::fs::write(dir.path().join("not-snapshot").join("data.txt"), "x").unwrap();
1676 std::fs::create_dir_all(dir.path().join("broken")).unwrap();
1677 std::fs::write(dir.path().join("broken").join(MANIFEST_FILE), "{not-json").unwrap();
1678
1679 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1680 let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
1681 let result = scan_snapshots(&storage).await.unwrap();
1682
1683 assert_eq!(result.snapshots.len(), 2);
1684 assert_eq!(
1685 result.snapshots[0].manifest.created_at,
1686 chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap()
1687 );
1688 assert_eq!(
1689 result.snapshots[1].manifest.created_at,
1690 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap()
1691 );
1692 assert_eq!(result.unreadable, vec!["broken/".to_string()]);
1693 assert_eq!(result.snapshots[0].path, "newer/");
1694 assert_eq!(result.snapshots[1].path, "older/");
1695 }
1696
1697 #[test]
1698 fn test_snapshot_list_status_and_chunk_summary() {
1699 let schema_only = test_manifest(
1700 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1701 true,
1702 true,
1703 );
1704 assert_eq!(snapshot_status(&schema_only), "schema-only");
1705 assert_eq!(format_list_chunks(&schema_only), "0");
1706
1707 let complete = test_manifest(
1708 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1709 false,
1710 true,
1711 );
1712 assert_eq!(snapshot_status(&complete), "complete");
1713 assert_eq!(format_list_chunks(&complete), "2/2");
1714 assert_eq!(format_delete_chunks(&complete), "2 (all processed)");
1715
1716 let incomplete = test_manifest(
1717 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1718 false,
1719 false,
1720 );
1721 assert_eq!(snapshot_status(&incomplete), "incomplete");
1722 assert_eq!(format_list_chunks(&incomplete), "1/2");
1723 assert_eq!(
1724 format_delete_chunks(&incomplete),
1725 "2 (1 completed, 0 skipped, 1 pending, 0 in_progress, 0 failed)"
1726 );
1727 }
1728
1729 #[tokio::test]
1730 async fn test_delete_build_rejects_bucket_root_uri() {
1731 let cmd = ExportDeleteCommand::parse_from([
1732 "export-v2-delete",
1733 "--snapshot",
1734 "s3://bucket",
1735 "--no-confirm",
1736 ]);
1737
1738 let error = cmd.build().await.err().unwrap().to_string();
1739 assert!(error.contains("non-empty path"));
1740 }
1741
1742 #[test]
1743 fn test_delete_skip_confirmation_aliases() {
1744 let no_confirm = ExportDeleteCommand::parse_from([
1745 "export-v2-delete",
1746 "--snapshot",
1747 "s3://bucket/snapshot",
1748 "--no-confirm",
1749 ]);
1750 assert!(no_confirm.skip_confirmation);
1751
1752 let yes = ExportDeleteCommand::parse_from([
1753 "export-v2-delete",
1754 "--snapshot",
1755 "s3://bucket/snapshot",
1756 "--yes",
1757 ]);
1758 assert!(yes.skip_confirmation);
1759 }
1760
1761 #[tokio::test]
1762 async fn test_delete_snapshot_with_no_confirm_removes_snapshot_contents() {
1763 let parent = tempdir().unwrap();
1764 let snapshot = parent.path().join("snapshot");
1765 let sibling = parent.path().join("sibling");
1766 std::fs::create_dir_all(&snapshot).unwrap();
1767 std::fs::create_dir_all(&sibling).unwrap();
1768 std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
1769 write_root_manifest(
1770 &snapshot,
1771 test_manifest(
1772 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1773 true,
1774 true,
1775 ),
1776 );
1777 write_snapshot_file(&snapshot, "schema/schemas.json", b"[]");
1778
1779 let uri = Url::from_directory_path(&snapshot).unwrap().to_string();
1780 let delete = ExportDelete {
1781 snapshot: uri,
1782 skip_confirmation: true,
1783 storage: file_storage_for_dir(&snapshot),
1784 };
1785
1786 delete
1787 .run_with_confirmation(|_| unreachable!())
1788 .await
1789 .unwrap();
1790
1791 assert!(!snapshot.join(MANIFEST_FILE).exists());
1792 assert!(!snapshot.join("schema/schemas.json").exists());
1793 assert!(sibling.join("keep.txt").exists());
1794 }
1795
1796 #[tokio::test]
1797 async fn test_delete_snapshot_requires_manifest() {
1798 let dir = tempdir().unwrap();
1799 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1800 let delete = ExportDelete {
1801 snapshot: uri,
1802 skip_confirmation: true,
1803 storage: file_storage_for_dir(dir.path()),
1804 };
1805
1806 let error = delete
1807 .run_with_confirmation(|_| unreachable!())
1808 .await
1809 .err()
1810 .unwrap()
1811 .to_string();
1812
1813 assert!(error.contains("Snapshot not found"));
1814 assert!(dir.path().exists());
1815 }
1816
1817 #[tokio::test]
1818 async fn test_delete_snapshot_cancels_without_exact_confirmation() {
1819 let dir = tempdir().unwrap();
1820 write_root_manifest(
1821 dir.path(),
1822 test_manifest(
1823 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1824 true,
1825 true,
1826 ),
1827 );
1828 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1829 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1830 let delete = ExportDelete {
1831 snapshot: uri.clone(),
1832 skip_confirmation: false,
1833 storage: file_storage_for_dir(dir.path()),
1834 };
1835
1836 delete
1837 .run_with_confirmation(|snapshot| {
1838 assert_eq!(snapshot, uri);
1839 Ok(false)
1840 })
1841 .await
1842 .unwrap();
1843
1844 assert!(dir.path().join(MANIFEST_FILE).exists());
1845 assert!(dir.path().join("schema/schemas.json").exists());
1846 }
1847
1848 #[test]
1849 fn test_delete_confirmation_requires_exact_yes() {
1850 assert!(delete_confirmation_matches("yes"));
1851 assert!(delete_confirmation_matches(" yes\n"));
1852 assert!(!delete_confirmation_matches("YES"));
1853 assert!(!delete_confirmation_matches("y"));
1854 assert!(!delete_confirmation_matches("yes please"));
1855 }
1856
1857 #[test]
1858 fn test_display_snapshot_prefix_adds_trailing_slash() {
1859 assert_eq!(
1860 display_snapshot_prefix("s3://bucket/snapshot"),
1861 "s3://bucket/snapshot/"
1862 );
1863 assert_eq!(
1864 display_snapshot_prefix("s3://bucket/snapshot/"),
1865 "s3://bucket/snapshot/"
1866 );
1867 }
1868
1869 #[tokio::test]
1870 async fn test_verify_snapshot_accepts_valid_full_snapshot() {
1871 let dir = tempdir().unwrap();
1872 let manifest = test_manifest(
1873 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1874 false,
1875 true,
1876 );
1877 write_root_manifest(dir.path(), manifest);
1878 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1879 write_default_ddl_files(dir.path());
1880 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
1881
1882 let storage = file_storage_for_dir(dir.path());
1883 let report = verify_snapshot(&storage).await.unwrap();
1884
1885 assert_eq!(report.error_count(), 0);
1886 assert_eq!(report.warning_count(), 0);
1887 assert_eq!(report.data_files_total, 1);
1888 assert_eq!(report.data_files_verified, 1);
1889 }
1890
1891 #[tokio::test]
1892 async fn test_verify_snapshot_reports_missing_data_file_and_failed_chunk() {
1893 let dir = tempdir().unwrap();
1894 let mut manifest = test_manifest(
1895 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1896 false,
1897 true,
1898 );
1899 manifest.chunks[1].mark_failed("copy failed".to_string());
1900 write_root_manifest(dir.path(), manifest);
1901 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1902 write_default_ddl_files(dir.path());
1903
1904 let storage = file_storage_for_dir(dir.path());
1905 let report = verify_snapshot(&storage).await.unwrap();
1906
1907 assert_eq!(report.error_count(), 2);
1908 assert!(
1909 report
1910 .problems
1911 .iter()
1912 .any(|problem| problem.message.contains("missing file"))
1913 );
1914 assert!(
1915 report
1916 .problems
1917 .iter()
1918 .any(|problem| problem.message.contains("status is 'failed'"))
1919 );
1920 }
1921
1922 #[tokio::test]
1923 async fn test_verify_snapshot_reports_missing_schema_index_as_warning() {
1924 let dir = tempdir().unwrap();
1925 let manifest = test_manifest(
1926 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1927 false,
1928 true,
1929 );
1930 write_root_manifest(dir.path(), manifest);
1931 write_default_ddl_files(dir.path());
1932 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
1933
1934 let storage = file_storage_for_dir(dir.path());
1935 let report = verify_snapshot(&storage).await.unwrap();
1936
1937 assert_eq!(report.error_count(), 0);
1938 assert_eq!(report.warning_count(), 1);
1939 assert!(
1940 report
1941 .problems
1942 .iter()
1943 .any(|problem| problem.message.contains("Missing schema index"))
1944 );
1945 }
1946
1947 #[tokio::test]
1948 async fn test_verify_snapshot_rejects_schema_only_snapshot_with_chunks() {
1949 let dir = tempdir().unwrap();
1950 let mut manifest = test_manifest(
1951 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1952 true,
1953 true,
1954 );
1955 let mut chunk = ChunkMeta::new(1, TimeRange::unbounded());
1956 chunk.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
1957 manifest.chunks.push(chunk);
1958 write_root_manifest(dir.path(), manifest);
1959 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1960 write_default_ddl_files(dir.path());
1961
1962 let storage = file_storage_for_dir(dir.path());
1963 let report = verify_snapshot(&storage).await.unwrap();
1964
1965 assert_eq!(report.error_count(), 1);
1966 assert_eq!(report.data_files_total, 0);
1967 assert!(
1968 report
1969 .problems
1970 .iter()
1971 .any(|problem| problem.message.contains("should not contain data chunks"))
1972 );
1973 }
1974
1975 #[tokio::test]
1976 async fn test_verify_snapshot_rejects_schema_only_snapshot_with_data_files() {
1977 let dir = tempdir().unwrap();
1978 let manifest = test_manifest(
1979 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1980 true,
1981 true,
1982 );
1983 write_root_manifest(dir.path(), manifest);
1984 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1985 write_default_ddl_files(dir.path());
1986 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
1987
1988 let storage = file_storage_for_dir(dir.path());
1989 let report = verify_snapshot(&storage).await.unwrap();
1990
1991 assert_eq!(report.error_count(), 1);
1992 assert_eq!(report.data_files_total, 0);
1993 assert!(
1994 report
1995 .problems
1996 .iter()
1997 .any(|problem| problem.message.contains("should not contain data files"))
1998 );
1999 }
2000
2001 #[tokio::test]
2002 async fn test_verify_snapshot_rejects_full_snapshot_without_chunks() {
2003 let dir = tempdir().unwrap();
2004 let mut manifest = test_manifest(
2005 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2006 false,
2007 true,
2008 );
2009 manifest.chunks.clear();
2010 write_root_manifest(dir.path(), manifest);
2011 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2012 write_default_ddl_files(dir.path());
2013
2014 let storage = file_storage_for_dir(dir.path());
2015 let report = verify_snapshot(&storage).await.unwrap();
2016
2017 assert_eq!(report.error_count(), 1);
2018 assert_eq!(report.data_files_total, 0);
2019 assert!(
2020 report
2021 .problems
2022 .iter()
2023 .any(|problem| problem.message.contains("at least one data chunk"))
2024 );
2025 }
2026
2027 #[tokio::test]
2028 async fn test_verify_snapshot_rejects_skipped_chunk_data_files() {
2029 let dir = tempdir().unwrap();
2030 let manifest = test_manifest(
2031 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2032 false,
2033 true,
2034 );
2035 write_root_manifest(dir.path(), manifest);
2036 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2037 write_default_ddl_files(dir.path());
2038 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2039 write_snapshot_file(dir.path(), "data/public/2/file.parquet", b"data");
2040
2041 let storage = file_storage_for_dir(dir.path());
2042 let report = verify_snapshot(&storage).await.unwrap();
2043
2044 assert_eq!(report.error_count(), 1);
2045 assert!(
2046 report
2047 .problems
2048 .iter()
2049 .any(|problem| { problem.message.contains("Unexpected data file") })
2050 );
2051 }
2052
2053 #[tokio::test]
2054 async fn test_verify_snapshot_rejects_duplicate_chunk_ids() {
2055 let dir = tempdir().unwrap();
2056 let mut manifest = test_manifest(
2057 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2058 false,
2059 true,
2060 );
2061 let mut duplicate = ChunkMeta::new(1, TimeRange::unbounded());
2062 duplicate.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2063 manifest.chunks.push(duplicate);
2064 write_root_manifest(dir.path(), manifest);
2065 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2066 write_default_ddl_files(dir.path());
2067 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2068
2069 let storage = file_storage_for_dir(dir.path());
2070 let report = verify_snapshot(&storage).await.unwrap();
2071
2072 assert_eq!(report.error_count(), 1);
2073 assert!(
2074 report
2075 .problems
2076 .iter()
2077 .any(|problem| problem.message.contains("duplicate chunk id"))
2078 );
2079 }
2080
2081 #[tokio::test]
2082 async fn test_verify_snapshot_requires_all_schema_ddl() {
2083 let dir = tempdir().unwrap();
2084 let manifest = test_manifest(
2085 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2086 true,
2087 true,
2088 );
2089 write_root_manifest(dir.path(), manifest);
2090 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2091 write_snapshot_file(
2092 dir.path(),
2093 "schema/ddl/public.sql",
2094 b"CREATE DATABASE public;",
2095 );
2096
2097 let storage = file_storage_for_dir(dir.path());
2098 let report = verify_snapshot(&storage).await.unwrap();
2099
2100 assert_eq!(report.error_count(), 1);
2101 assert!(
2102 report
2103 .problems
2104 .iter()
2105 .any(|problem| problem.message.contains("analytics"))
2106 );
2107 }
2108
2109 #[tokio::test]
2110 async fn test_verify_snapshot_reports_missing_ddl_dir() {
2111 let dir = tempdir().unwrap();
2112 let manifest = test_manifest(
2113 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2114 false,
2115 true,
2116 );
2117 write_root_manifest(dir.path(), manifest);
2118 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2119 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2120
2121 let storage = file_storage_for_dir(dir.path());
2122 let report = verify_snapshot(&storage).await.unwrap();
2123
2124 assert_eq!(report.error_count(), 2);
2125 assert!(
2126 report
2127 .problems
2128 .iter()
2129 .any(|problem| problem.message.contains("schema/ddl/public.sql"))
2130 );
2131 assert!(
2132 report
2133 .problems
2134 .iter()
2135 .any(|problem| problem.message.contains("schema/ddl/analytics.sql"))
2136 );
2137 }
2138
2139 #[tokio::test]
2140 async fn test_verify_snapshot_reports_manifest_version_mismatch() {
2141 let dir = tempdir().unwrap();
2142 let mut manifest = test_manifest(
2143 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2144 false,
2145 true,
2146 );
2147 manifest.version = MANIFEST_VERSION + 1;
2148 write_root_manifest(dir.path(), manifest);
2149 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2150 write_default_ddl_files(dir.path());
2151 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2152
2153 let storage = file_storage_for_dir(dir.path());
2154 let report = verify_snapshot(&storage).await.unwrap();
2155
2156 assert_eq!(report.error_count(), 1);
2157 assert!(
2158 report
2159 .problems
2160 .iter()
2161 .any(|problem| problem.message.contains("Manifest version mismatch"))
2162 );
2163 }
2164
2165 #[tokio::test]
2166 async fn test_verify_snapshot_rejects_invalid_data_file_paths() {
2167 let dir = tempdir().unwrap();
2168 let mut manifest = test_manifest(
2169 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2170 false,
2171 true,
2172 );
2173 manifest.chunks[0].files = vec!["data/public/1/../file.parquet".to_string()];
2174 write_root_manifest(dir.path(), manifest);
2175 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2176 write_default_ddl_files(dir.path());
2177
2178 let storage = file_storage_for_dir(dir.path());
2179 let report = verify_snapshot(&storage).await.unwrap();
2180
2181 assert_eq!(report.error_count(), 1);
2182 assert!(
2183 report
2184 .problems
2185 .iter()
2186 .any(|problem| problem.message.contains("invalid data file path"))
2187 );
2188 assert_eq!(report.data_files_verified, 0);
2189 }
2190
2191 #[tokio::test]
2192 async fn test_verify_snapshot_accepts_leading_slash_manifest_data_paths() {
2193 let dir = tempdir().unwrap();
2194 let mut manifest = test_manifest(
2195 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2196 false,
2197 true,
2198 );
2199 manifest.chunks[0].files = vec!["/data/public/1/file.parquet".to_string()];
2200 write_root_manifest(dir.path(), manifest);
2201 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2202 write_default_ddl_files(dir.path());
2203 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2204
2205 let storage = file_storage_for_dir(dir.path());
2206 let report = verify_snapshot(&storage).await.unwrap();
2207
2208 assert_eq!(report.error_count(), 0);
2209 assert_eq!(report.data_files_verified, 1);
2210 }
2211
2212 #[tokio::test]
2213 async fn test_verify_snapshot_rejects_unlisted_files_under_completed_chunk_prefix() {
2214 let dir = tempdir().unwrap();
2215 let manifest = test_manifest(
2216 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2217 false,
2218 true,
2219 );
2220 write_root_manifest(dir.path(), manifest);
2221 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2222 write_default_ddl_files(dir.path());
2223 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2224 write_snapshot_file(dir.path(), "data/public/1/extra.parquet", b"data");
2225
2226 let storage = file_storage_for_dir(dir.path());
2227 let report = verify_snapshot(&storage).await.unwrap();
2228
2229 assert_eq!(report.error_count(), 1);
2230 assert!(
2231 report
2232 .problems
2233 .iter()
2234 .any(|problem| problem.message.contains("Unexpected data file"))
2235 );
2236 assert_eq!(report.data_files_verified, 1);
2237 }
2238
2239 #[tokio::test]
2240 async fn test_verify_snapshot_rejects_orphan_data_files_outside_known_chunk_prefixes() {
2241 let dir = tempdir().unwrap();
2242 let manifest = test_manifest(
2243 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2244 false,
2245 true,
2246 );
2247 write_root_manifest(dir.path(), manifest);
2248 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2249 write_default_ddl_files(dir.path());
2250 write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2251 write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2252
2253 let storage = file_storage_for_dir(dir.path());
2254 let report = verify_snapshot(&storage).await.unwrap();
2255
2256 assert_eq!(report.error_count(), 1);
2257 assert!(
2258 report
2259 .problems
2260 .iter()
2261 .any(|problem| problem.message.contains("Unexpected data file"))
2262 );
2263 assert_eq!(report.data_files_verified, 1);
2264 }
2265
2266 #[tokio::test]
2267 async fn test_verify_snapshot_rejects_data_files_under_wrong_chunk_or_schema() {
2268 let dir = tempdir().unwrap();
2269 let mut manifest = test_manifest(
2270 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2271 false,
2272 true,
2273 );
2274 manifest.chunks[0].files = vec![
2275 "data/public/99/file.parquet".to_string(),
2276 "data/metrics/1/file.parquet".to_string(),
2277 ];
2278 write_root_manifest(dir.path(), manifest);
2279 write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2280 write_default_ddl_files(dir.path());
2281 write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2282 write_snapshot_file(dir.path(), "data/metrics/1/file.parquet", b"data");
2283
2284 let storage = file_storage_for_dir(dir.path());
2285 let report = verify_snapshot(&storage).await.unwrap();
2286
2287 assert_eq!(report.error_count(), 2);
2288 assert_eq!(report.data_files_verified, 0);
2289 assert!(
2290 report
2291 .problems
2292 .iter()
2293 .all(|problem| problem.message.contains("invalid data file path"))
2294 );
2295 }
2296
2297 fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) {
2298 let snapshot_dir = root.join(dir);
2299 std::fs::create_dir_all(&snapshot_dir).unwrap();
2300 std::fs::write(
2301 snapshot_dir.join(MANIFEST_FILE),
2302 serde_json::to_vec_pretty(&manifest).unwrap(),
2303 )
2304 .unwrap();
2305 }
2306
2307 fn write_root_manifest(root: &std::path::Path, manifest: Manifest) {
2308 std::fs::write(
2309 root.join(MANIFEST_FILE),
2310 serde_json::to_vec_pretty(&manifest).unwrap(),
2311 )
2312 .unwrap();
2313 }
2314
2315 fn write_snapshot_file(root: &std::path::Path, relative_path: &str, content: &[u8]) {
2316 let mut path = root.to_path_buf();
2317 for segment in relative_path.split('/') {
2318 path.push(segment);
2319 }
2320 std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2321 std::fs::write(path, content).unwrap();
2322 }
2323
2324 fn write_default_ddl_files(root: &std::path::Path) {
2325 write_snapshot_file(root, "schema/ddl/public.sql", b"CREATE DATABASE public;");
2326 write_snapshot_file(
2327 root,
2328 "schema/ddl/analytics.sql",
2329 b"CREATE DATABASE analytics;",
2330 );
2331 }
2332
2333 fn file_storage_for_dir(root: &std::path::Path) -> OpenDalStorage {
2334 let uri = Url::from_directory_path(root).unwrap().to_string();
2335 OpenDalStorage::from_file_uri(&uri).unwrap()
2336 }
2337
2338 fn test_manifest(
2339 created_at: chrono::DateTime<chrono::Utc>,
2340 schema_only: bool,
2341 complete: bool,
2342 ) -> Manifest {
2343 let mut manifest = Manifest::new_for_export(
2344 "greptime".to_string(),
2345 vec!["public".to_string(), "analytics".to_string()],
2346 schema_only,
2347 TimeRange::unbounded(),
2348 DataFormat::Parquet,
2349 None,
2350 )
2351 .unwrap();
2352 manifest.created_at = created_at;
2353 manifest.updated_at = created_at;
2354
2355 if !schema_only {
2356 manifest.chunks.clear();
2357 let mut first = ChunkMeta::new(1, TimeRange::unbounded());
2358 first.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2359 manifest.chunks.push(first);
2360
2361 if complete {
2362 manifest
2363 .chunks
2364 .push(ChunkMeta::skipped(2, TimeRange::unbounded()));
2365 } else {
2366 manifest
2367 .chunks
2368 .push(ChunkMeta::new(2, TimeRange::unbounded()));
2369 }
2370 }
2371
2372 manifest
2373 }
2374}