1use std::collections::HashSet;
18use std::time::Duration;
19
20use async_trait::async_trait;
21use clap::{Parser, Subcommand};
22use common_error::ext::BoxedError;
23use common_telemetry::info;
24use serde_json::Value;
25use snafu::{OptionExt, ResultExt};
26
27use crate::Tool;
28use crate::common::ObjectStoreConfig;
29use crate::data::export_v2::coordinator::export_data;
30use crate::data::export_v2::error::{
31 ChunkTimeWindowRequiresBoundsSnafu, DatabaseSnafu, EmptyResultSnafu,
32 ManifestVersionMismatchSnafu, Result, ResumeConfigMismatchSnafu, SchemaOnlyArgsNotAllowedSnafu,
33 SchemaOnlyModeMismatchSnafu, UnexpectedValueTypeSnafu,
34};
35use crate::data::export_v2::extractor::SchemaExtractor;
36use crate::data::export_v2::manifest::{
37 ChunkMeta, DataFormat, MANIFEST_FILE, MANIFEST_VERSION, Manifest, TimeRange,
38};
39use crate::data::path::ddl_path_for_schema;
40use crate::data::snapshot_storage::{OpenDalStorage, SnapshotStorage, validate_uri};
41use crate::data::sql::{escape_sql_identifier, escape_sql_literal};
42use crate::database::{DatabaseClient, parse_proxy_opts};
43
44#[derive(Debug, Subcommand)]
46pub enum ExportV2Command {
47 Create(ExportCreateCommand),
49 List(ExportListCommand),
51}
52
53impl ExportV2Command {
54 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
55 match self {
56 ExportV2Command::Create(cmd) => cmd.build().await,
57 ExportV2Command::List(cmd) => cmd.build().await,
58 }
59 }
60}
61
62#[derive(Debug, Parser)]
64pub struct ExportListCommand {
65 #[clap(long)]
67 location: String,
68
69 #[clap(flatten)]
71 storage: ObjectStoreConfig,
72}
73
74impl ExportListCommand {
75 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
76 validate_uri(&self.location).map_err(BoxedError::new)?;
77 let storage = OpenDalStorage::from_parent_uri(&self.location, &self.storage)
78 .map_err(BoxedError::new)?;
79
80 Ok(Box::new(ExportList {
81 location: self.location.clone(),
82 storage,
83 }))
84 }
85}
86
87pub struct ExportList {
89 location: String,
90 storage: OpenDalStorage,
91}
92
93#[async_trait]
94impl Tool for ExportList {
95 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
96 self.run().await.map_err(BoxedError::new)
97 }
98}
99
100impl ExportList {
101 async fn run(&self) -> Result<()> {
102 let result = scan_snapshots(&self.storage).await?;
103
104 println!("Scanning: {}", self.location);
105 if result.snapshots.is_empty() {
106 println!("No snapshots found.");
107 } else {
108 print_snapshot_list(&result.snapshots, result.unreadable.len());
109 }
110 print_unreadable_warnings(&result.unreadable);
111
112 Ok(())
113 }
114}
115
116#[derive(Debug, Parser)]
118pub struct ExportCreateCommand {
119 #[clap(long)]
121 addr: String,
122
123 #[clap(long)]
125 to: String,
126
127 #[clap(long, default_value = "greptime")]
129 catalog: String,
130
131 #[clap(long, value_delimiter = ',')]
134 schemas: Vec<String>,
135
136 #[clap(long)]
138 schema_only: bool,
139
140 #[clap(long)]
142 start_time: Option<String>,
143
144 #[clap(long)]
146 end_time: Option<String>,
147
148 #[clap(long, value_parser = humantime::parse_duration)]
151 chunk_time_window: Option<Duration>,
152
153 #[clap(long, value_enum, default_value = "parquet")]
155 format: DataFormat,
156
157 #[clap(long)]
159 force: bool,
160
161 #[clap(long, default_value = "1")]
163 parallelism: usize,
164
165 #[clap(long)]
167 auth_basic: Option<String>,
168
169 #[clap(long, value_parser = humantime::parse_duration)]
171 timeout: Option<Duration>,
172
173 #[clap(long)]
178 proxy: Option<String>,
179
180 #[clap(long)]
184 no_proxy: bool,
185
186 #[clap(flatten)]
188 storage: ObjectStoreConfig,
189}
190
191impl ExportCreateCommand {
192 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
193 validate_uri(&self.to).map_err(BoxedError::new)?;
195
196 let time_range = TimeRange::parse(self.start_time.as_deref(), self.end_time.as_deref())
197 .map_err(BoxedError::new)?;
198 if self.chunk_time_window.is_some() && !time_range.is_bounded() {
199 return ChunkTimeWindowRequiresBoundsSnafu
200 .fail()
201 .map_err(BoxedError::new);
202 }
203 if self.schema_only {
204 let mut invalid_args = Vec::new();
205 if self.start_time.is_some() {
206 invalid_args.push("--start-time");
207 }
208 if self.end_time.is_some() {
209 invalid_args.push("--end-time");
210 }
211 if self.chunk_time_window.is_some() {
212 invalid_args.push("--chunk-time-window");
213 }
214 if self.format != DataFormat::Parquet {
215 invalid_args.push("--format");
216 }
217 if self.parallelism != 1 {
218 invalid_args.push("--parallelism");
219 }
220 if !invalid_args.is_empty() {
221 return SchemaOnlyArgsNotAllowedSnafu {
222 args: invalid_args.join(", "),
223 }
224 .fail()
225 .map_err(BoxedError::new);
226 }
227 }
228
229 let schemas = if self.schemas.is_empty() {
231 None
232 } else {
233 Some(self.schemas.clone())
234 };
235
236 let storage = OpenDalStorage::from_uri(&self.to, &self.storage).map_err(BoxedError::new)?;
238
239 let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
241 let database_client = DatabaseClient::new(
242 self.addr.clone(),
243 self.catalog.clone(),
244 self.auth_basic.clone(),
245 self.timeout.unwrap_or(Duration::from_secs(60)),
246 proxy,
247 self.no_proxy,
248 );
249
250 Ok(Box::new(ExportCreate {
251 config: ExportConfig {
252 catalog: self.catalog.clone(),
253 schemas,
254 schema_only: self.schema_only,
255 format: self.format,
256 force: self.force,
257 time_range,
258 chunk_time_window: self.chunk_time_window,
259 parallelism: self.parallelism,
260 snapshot_uri: self.to.clone(),
261 storage_config: self.storage.clone(),
262 },
263 storage: Box::new(storage),
264 database_client,
265 }))
266 }
267}
268
269pub struct ExportCreate {
271 config: ExportConfig,
272 storage: Box<dyn SnapshotStorage>,
273 database_client: DatabaseClient,
274}
275
276struct ExportConfig {
277 catalog: String,
278 schemas: Option<Vec<String>>,
279 schema_only: bool,
280 format: DataFormat,
281 force: bool,
282 time_range: TimeRange,
283 chunk_time_window: Option<Duration>,
284 parallelism: usize,
285 snapshot_uri: String,
286 storage_config: ObjectStoreConfig,
287}
288
289#[async_trait]
290impl Tool for ExportCreate {
291 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
292 self.run().await.map_err(BoxedError::new)
293 }
294}
295
296impl ExportCreate {
297 async fn run(&self) -> Result<()> {
298 let exists = self.storage.exists().await?;
300
301 if exists {
302 if self.config.force {
303 info!("Deleting existing snapshot (--force)");
304 self.storage.delete_snapshot().await?;
305 } else {
306 let mut manifest = self.storage.read_manifest().await?;
308
309 if manifest.version != MANIFEST_VERSION {
311 return ManifestVersionMismatchSnafu {
312 expected: MANIFEST_VERSION,
313 found: manifest.version,
314 }
315 .fail();
316 }
317
318 validate_resume_config(&manifest, &self.config)?;
319
320 info!(
321 "Resuming existing snapshot: {} (completed: {}/{} chunks)",
322 manifest.snapshot_id,
323 manifest.completed_count(),
324 manifest.chunks.len()
325 );
326
327 if manifest.is_complete() {
328 info!("Snapshot is already complete");
329 return Ok(());
330 }
331
332 if manifest.schema_only {
333 return Ok(());
334 }
335
336 export_data(
337 self.storage.as_ref(),
338 &self.database_client,
339 &self.config.snapshot_uri,
340 &self.config.storage_config,
341 &mut manifest,
342 self.config.parallelism,
343 )
344 .await?;
345 return Ok(());
346 }
347 }
348
349 let extractor = SchemaExtractor::new(&self.database_client, &self.config.catalog);
351 let schema_snapshot = extractor.extract(self.config.schemas.as_deref()).await?;
352
353 let schema_names: Vec<String> = schema_snapshot
354 .schemas
355 .iter()
356 .map(|s| s.name.clone())
357 .collect();
358 info!("Exporting schemas: {:?}", schema_names);
359
360 let mut manifest = Manifest::new_for_export(
362 self.config.catalog.clone(),
363 schema_names.clone(),
364 self.config.schema_only,
365 self.config.time_range.clone(),
366 self.config.format,
367 self.config.chunk_time_window,
368 )?;
369
370 self.storage.write_schema(&schema_snapshot).await?;
372 info!("Exported {} schemas", schema_snapshot.schemas.len());
373
374 let ddl_by_schema = self.build_ddl_by_schema(&schema_names).await?;
376 for (schema, ddl) in ddl_by_schema {
377 let ddl_path = ddl_path_for_schema(&schema);
378 self.storage.write_text(&ddl_path, &ddl).await?;
379 info!("Exported DDL for schema {} to {}", schema, ddl_path);
380 }
381
382 self.storage.write_manifest(&manifest).await?;
390 info!("Snapshot created: {}", manifest.snapshot_id);
391
392 if !self.config.schema_only {
393 export_data(
394 self.storage.as_ref(),
395 &self.database_client,
396 &self.config.snapshot_uri,
397 &self.config.storage_config,
398 &mut manifest,
399 self.config.parallelism,
400 )
401 .await?;
402 }
403
404 Ok(())
405 }
406
407 async fn build_ddl_by_schema(&self, schema_names: &[String]) -> Result<Vec<(String, String)>> {
408 let mut schemas = schema_names.to_vec();
409 schemas.sort();
410
411 let mut ddl_by_schema = Vec::with_capacity(schemas.len());
412 for schema in schemas {
413 let create_database = self.show_create("DATABASE", &schema, None).await?;
414
415 let (mut physical_tables, mut tables, mut views) =
416 self.get_schema_objects(&schema).await?;
417 physical_tables.sort();
418 let mut physical_ddls = Vec::with_capacity(physical_tables.len());
419 for table in physical_tables {
420 physical_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
421 }
422
423 tables.sort();
424 let mut table_ddls = Vec::with_capacity(tables.len());
425 for table in tables {
426 table_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
427 }
428
429 views.sort();
430 let mut view_ddls = Vec::with_capacity(views.len());
431 for view in views {
432 view_ddls.push(self.show_create("VIEW", &schema, Some(&view)).await?);
433 }
434
435 let ddl = build_schema_ddl(
436 &schema,
437 create_database,
438 physical_ddls,
439 table_ddls,
440 view_ddls,
441 );
442 ddl_by_schema.push((schema, ddl));
443 }
444
445 Ok(ddl_by_schema)
446 }
447
448 async fn get_schema_objects(
449 &self,
450 schema: &str,
451 ) -> Result<(Vec<String>, Vec<String>, Vec<String>)> {
452 let physical_tables = self.get_metric_physical_tables(schema).await?;
453 let physical_set: HashSet<&str> = physical_tables.iter().map(String::as_str).collect();
454 let sql = format!(
455 "SELECT table_name, table_type FROM information_schema.tables \
456 WHERE table_catalog = '{}' AND table_schema = '{}' \
457 AND (table_type = 'BASE TABLE' OR table_type = 'VIEW')",
458 escape_sql_literal(&self.config.catalog),
459 escape_sql_literal(schema)
460 );
461 let records: Option<Vec<Vec<Value>>> = self
462 .database_client
463 .sql_in_public(&sql)
464 .await
465 .context(DatabaseSnafu)?;
466
467 let mut tables = Vec::new();
468 let mut views = Vec::new();
469 if let Some(rows) = records {
470 for row in rows {
471 let name = match row.first() {
472 Some(Value::String(name)) => name.clone(),
473 _ => return UnexpectedValueTypeSnafu.fail(),
474 };
475 let table_type = match row.get(1) {
476 Some(Value::String(table_type)) => table_type.as_str(),
477 _ => return UnexpectedValueTypeSnafu.fail(),
478 };
479 if !physical_set.contains(name.as_str()) {
480 if table_type == "VIEW" {
481 views.push(name);
482 } else {
483 tables.push(name);
484 }
485 }
486 }
487 }
488
489 Ok((physical_tables, tables, views))
490 }
491
492 async fn get_metric_physical_tables(&self, schema: &str) -> Result<Vec<String>> {
493 let sql = format!(
494 "SELECT DISTINCT table_name FROM information_schema.columns \
495 WHERE table_catalog = '{}' AND table_schema = '{}' AND column_name = '__tsid'",
496 escape_sql_literal(&self.config.catalog),
497 escape_sql_literal(schema)
498 );
499 let records: Option<Vec<Vec<Value>>> = self
500 .database_client
501 .sql_in_public(&sql)
502 .await
503 .context(DatabaseSnafu)?;
504
505 let mut tables = HashSet::new();
506 if let Some(rows) = records {
507 for row in rows {
508 let name = match row.first() {
509 Some(Value::String(name)) => name.clone(),
510 _ => return UnexpectedValueTypeSnafu.fail(),
511 };
512 tables.insert(name);
513 }
514 }
515
516 Ok(tables.into_iter().collect())
517 }
518
519 async fn show_create(
520 &self,
521 show_type: &str,
522 schema: &str,
523 table: Option<&str>,
524 ) -> Result<String> {
525 let sql = match table {
526 Some(table) => format!(
527 r#"SHOW CREATE {} "{}"."{}"."{}""#,
528 show_type,
529 escape_sql_identifier(&self.config.catalog),
530 escape_sql_identifier(schema),
531 escape_sql_identifier(table)
532 ),
533 None => format!(
534 r#"SHOW CREATE {} "{}"."{}""#,
535 show_type,
536 escape_sql_identifier(&self.config.catalog),
537 escape_sql_identifier(schema)
538 ),
539 };
540
541 let records: Option<Vec<Vec<Value>>> = self
542 .database_client
543 .sql_in_public(&sql)
544 .await
545 .context(DatabaseSnafu)?;
546 let rows = records.context(EmptyResultSnafu)?;
547 let row = rows.first().context(EmptyResultSnafu)?;
548 let Some(Value::String(create)) = row.get(1) else {
549 return UnexpectedValueTypeSnafu.fail();
550 };
551
552 Ok(format!("{};\n", create))
553 }
554}
555
556fn build_schema_ddl(
557 schema: &str,
558 create_database: String,
559 physical_tables: Vec<String>,
560 tables: Vec<String>,
561 views: Vec<String>,
562) -> String {
563 let mut ddl = String::new();
564 ddl.push_str(&format!("-- Schema: {}\n", schema));
565 ddl.push_str(&create_database);
566 for stmt in physical_tables {
567 ddl.push_str(&stmt);
568 }
569 for stmt in tables {
570 ddl.push_str(&stmt);
571 }
572 for stmt in views {
573 ddl.push_str(&stmt);
574 }
575 ddl.push('\n');
576 ddl
577}
578
579fn validate_resume_config(manifest: &Manifest, config: &ExportConfig) -> Result<()> {
580 if manifest.schema_only != config.schema_only {
581 return SchemaOnlyModeMismatchSnafu {
582 existing_schema_only: manifest.schema_only,
583 requested_schema_only: config.schema_only,
584 }
585 .fail();
586 }
587
588 if manifest.catalog != config.catalog {
589 return ResumeConfigMismatchSnafu {
590 field: "catalog",
591 existing: manifest.catalog.clone(),
592 requested: config.catalog.clone(),
593 }
594 .fail();
595 }
596
597 if let Some(requested_schemas) = &config.schemas
600 && !schema_selection_matches(&manifest.schemas, requested_schemas)
601 {
602 return ResumeConfigMismatchSnafu {
603 field: "schemas",
604 existing: format_schema_selection(&manifest.schemas),
605 requested: format_schema_selection(requested_schemas),
606 }
607 .fail();
608 }
609
610 if manifest.time_range != config.time_range {
611 return ResumeConfigMismatchSnafu {
612 field: "time_range",
613 existing: format!("{:?}", manifest.time_range),
614 requested: format!("{:?}", config.time_range),
615 }
616 .fail();
617 }
618
619 if manifest.format != config.format {
620 return ResumeConfigMismatchSnafu {
621 field: "format",
622 existing: manifest.format.to_string(),
623 requested: config.format.to_string(),
624 }
625 .fail();
626 }
627
628 let expected_plan = Manifest::new_for_export(
629 manifest.catalog.clone(),
630 manifest.schemas.clone(),
631 config.schema_only,
632 config.time_range.clone(),
633 config.format,
634 config.chunk_time_window,
635 )?;
636 if !chunk_plan_matches(manifest, &expected_plan) {
637 return ResumeConfigMismatchSnafu {
638 field: "chunk plan",
639 existing: format_chunk_plan(&manifest.chunks),
640 requested: format_chunk_plan(&expected_plan.chunks),
641 }
642 .fail();
643 }
644
645 Ok(())
646}
647
648fn schema_selection_matches(existing: &[String], requested: &[String]) -> bool {
649 canonical_schema_selection(existing) == canonical_schema_selection(requested)
650}
651
652fn canonical_schema_selection(schemas: &[String]) -> Vec<String> {
653 let mut canonicalized = Vec::new();
654 let mut seen = HashSet::new();
655
656 for schema in schemas {
657 let normalized = schema.to_ascii_lowercase();
658 if seen.insert(normalized.clone()) {
659 canonicalized.push(normalized);
660 }
661 }
662
663 canonicalized.sort();
664 canonicalized
665}
666
667fn format_schema_selection(schemas: &[String]) -> String {
668 format!("[{}]", schemas.join(", "))
669}
670
671fn chunk_plan_matches(existing: &Manifest, expected: &Manifest) -> bool {
672 existing.chunks.len() == expected.chunks.len()
673 && existing
674 .chunks
675 .iter()
676 .zip(&expected.chunks)
677 .all(|(left, right)| left.id == right.id && left.time_range == right.time_range)
678}
679
680fn format_chunk_plan(chunks: &[ChunkMeta]) -> String {
681 let items = chunks
682 .iter()
683 .map(|chunk| format!("#{}:{:?}", chunk.id, chunk.time_range))
684 .collect::<Vec<_>>();
685 format!("[{}]", items.join(", "))
686}
687
688#[derive(Debug)]
689struct SnapshotListEntry {
690 path: String,
691 manifest: Manifest,
692}
693
694#[derive(Debug, Default)]
695struct SnapshotScanResult {
696 snapshots: Vec<SnapshotListEntry>,
697 unreadable: Vec<String>,
698}
699
700async fn scan_snapshots(storage: &OpenDalStorage) -> Result<SnapshotScanResult> {
701 let mut result = SnapshotScanResult::default();
702 for dir in storage.list_direct_child_dirs().await? {
703 let manifest_path = format!("{}/{}", dir.trim_matches('/'), MANIFEST_FILE);
704 let Some(data) = storage.read_file_if_exists(&manifest_path).await? else {
705 continue;
706 };
707
708 match serde_json::from_slice::<Manifest>(&data) {
709 Ok(manifest) => result.snapshots.push(SnapshotListEntry {
710 path: format!("{}/", dir.trim_matches('/')),
711 manifest,
712 }),
713 Err(_) => result
714 .unreadable
715 .push(format!("{}/", dir.trim_matches('/'))),
716 }
717 }
718
719 result
720 .snapshots
721 .sort_by_key(|entry| std::cmp::Reverse(entry.manifest.created_at));
722 result.unreadable.sort();
723 Ok(result)
724}
725
726fn print_snapshot_list(snapshots: &[SnapshotListEntry], unreadable_count: usize) {
727 if unreadable_count == 0 {
728 println!("Found {} snapshots:", snapshots.len());
729 } else {
730 println!(
731 "Found {} snapshots ({} {} skipped: unreadable manifest):",
732 snapshots.len(),
733 unreadable_count,
734 directory_word(unreadable_count)
735 );
736 }
737 println!();
738 println!(
739 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} Status",
740 "Path", "ID", "Created", "Catalog", "Schemas", "Chunks"
741 );
742 println!(
743 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} {:<10}",
744 "-".repeat(24),
745 "-".repeat(36),
746 "-".repeat(19),
747 "-".repeat(9),
748 "-".repeat(7),
749 "-".repeat(6),
750 "-".repeat(10)
751 );
752 for entry in snapshots {
753 let manifest = &entry.manifest;
754 println!(
755 " {:<24} {:<36} {:<19} {:<9} {:<7} {:<6} {}",
756 entry.path,
757 manifest.snapshot_id,
758 manifest.created_at.format("%Y-%m-%d %H:%M:%S"),
759 manifest.catalog,
760 manifest.schemas.len(),
761 format_list_chunks(manifest),
762 snapshot_status(manifest)
763 );
764 }
765}
766
767fn print_unreadable_warnings(unreadable: &[String]) {
768 if unreadable.is_empty() {
769 return;
770 }
771
772 println!();
773 println!(
774 "Warning: {} {} had corrupt/unreadable manifest.json:",
775 unreadable.len(),
776 directory_word(unreadable.len())
777 );
778 for path in unreadable {
779 println!(" - {}", path);
780 }
781}
782
783fn directory_word(count: usize) -> &'static str {
784 if count == 1 {
785 "directory"
786 } else {
787 "directories"
788 }
789}
790
791fn snapshot_status(manifest: &Manifest) -> &'static str {
792 if manifest.schema_only {
793 "schema-only"
794 } else if manifest.is_complete() {
795 "complete"
796 } else {
797 "incomplete"
798 }
799}
800
801fn format_list_chunks(manifest: &Manifest) -> String {
802 let total = manifest.chunks.len();
803 if total == 0 {
804 return "0".to_string();
805 }
806
807 format!(
808 "{}/{}",
809 manifest.completed_count() + manifest.skipped_count(),
810 total
811 )
812}
813
814#[cfg(test)]
815mod tests {
816 use chrono::TimeZone;
817 use clap::Parser;
818 use tempfile::tempdir;
819 use url::Url;
820
821 use super::*;
822 use crate::data::path::ddl_path_for_schema;
823
824 #[test]
825 fn test_ddl_path_for_schema() {
826 assert_eq!(ddl_path_for_schema("public"), "schema/ddl/public.sql");
827 assert_eq!(
828 ddl_path_for_schema("../evil"),
829 "schema/ddl/%2E%2E%2Fevil.sql"
830 );
831 }
832
833 #[test]
834 fn test_build_schema_ddl_order() {
835 let ddl = build_schema_ddl(
836 "public",
837 "CREATE DATABASE public;\n".to_string(),
838 vec!["PHYSICAL;\n".to_string()],
839 vec!["TABLE;\n".to_string()],
840 vec!["VIEW;\n".to_string()],
841 );
842
843 let db_pos = ddl.find("CREATE DATABASE").unwrap();
844 let physical_pos = ddl.find("PHYSICAL;").unwrap();
845 let table_pos = ddl.find("TABLE;").unwrap();
846 let view_pos = ddl.find("VIEW;").unwrap();
847 assert!(db_pos < physical_pos);
848 assert!(physical_pos < table_pos);
849 assert!(table_pos < view_pos);
850 }
851
852 #[tokio::test]
853 async fn test_build_rejects_chunk_window_without_bounds() {
854 let cmd = ExportCreateCommand::parse_from([
855 "export-v2-create",
856 "--addr",
857 "127.0.0.1:4000",
858 "--to",
859 "file:///tmp/export-v2-test",
860 "--chunk-time-window",
861 "1h",
862 ]);
863
864 let result = cmd.build().await;
865 assert!(result.is_err());
866 let error = result.err().unwrap().to_string();
867
868 assert!(error.contains("chunk_time_window requires both --start-time and --end-time"));
869 }
870
871 #[tokio::test]
872 async fn test_build_rejects_data_export_args_in_schema_only_mode() {
873 let cmd = ExportCreateCommand::parse_from([
874 "export-v2-create",
875 "--addr",
876 "127.0.0.1:4000",
877 "--to",
878 "file:///tmp/export-v2-test",
879 "--schema-only",
880 "--start-time",
881 "2024-01-01T00:00:00Z",
882 "--end-time",
883 "2024-01-02T00:00:00Z",
884 "--chunk-time-window",
885 "1h",
886 "--format",
887 "csv",
888 "--parallelism",
889 "2",
890 ]);
891
892 let error = cmd.build().await.err().unwrap().to_string();
893
894 assert!(error.contains("--schema-only cannot be used with data export arguments"));
895 assert!(error.contains("--start-time"));
896 assert!(error.contains("--end-time"));
897 assert!(error.contains("--chunk-time-window"));
898 assert!(error.contains("--format"));
899 assert!(error.contains("--parallelism"));
900 }
901
902 #[test]
903 fn test_schema_only_mode_mismatch_error_message() {
904 let error = crate::data::export_v2::error::SchemaOnlyModeMismatchSnafu {
905 existing_schema_only: false,
906 requested_schema_only: true,
907 }
908 .build()
909 .to_string();
910
911 assert!(error.contains("existing: false"));
912 assert!(error.contains("requested: true"));
913 }
914
915 #[test]
916 fn test_validate_resume_config_rejects_catalog_mismatch() {
917 let manifest = Manifest::new_for_export(
918 "greptime".to_string(),
919 vec!["public".to_string()],
920 false,
921 TimeRange::unbounded(),
922 DataFormat::Parquet,
923 None,
924 )
925 .unwrap();
926 let config = ExportConfig {
927 catalog: "other".to_string(),
928 schemas: None,
929 schema_only: false,
930 format: DataFormat::Parquet,
931 force: false,
932 time_range: TimeRange::unbounded(),
933 chunk_time_window: None,
934 parallelism: 1,
935 snapshot_uri: "file:///tmp/snapshot".to_string(),
936 storage_config: ObjectStoreConfig::default(),
937 };
938
939 let error = validate_resume_config(&manifest, &config)
940 .err()
941 .unwrap()
942 .to_string();
943 assert!(error.contains("catalog"));
944 }
945
946 #[test]
947 fn test_validate_resume_config_accepts_schema_selection_with_different_case_and_order() {
948 let manifest = Manifest::new_for_export(
949 "greptime".to_string(),
950 vec!["public".to_string(), "analytics".to_string()],
951 false,
952 TimeRange::unbounded(),
953 DataFormat::Parquet,
954 None,
955 )
956 .unwrap();
957 let config = ExportConfig {
958 catalog: "greptime".to_string(),
959 schemas: Some(vec![
960 "ANALYTICS".to_string(),
961 "PUBLIC".to_string(),
962 "public".to_string(),
963 ]),
964 schema_only: false,
965 format: DataFormat::Parquet,
966 force: false,
967 time_range: TimeRange::unbounded(),
968 chunk_time_window: None,
969 parallelism: 1,
970 snapshot_uri: "file:///tmp/snapshot".to_string(),
971 storage_config: ObjectStoreConfig::default(),
972 };
973
974 assert!(validate_resume_config(&manifest, &config).is_ok());
975 }
976
977 #[test]
978 fn test_validate_resume_config_rejects_chunk_plan_mismatch() {
979 let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
980 let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 2, 0, 0).unwrap();
981 let time_range = TimeRange::new(Some(start), Some(end));
982 let manifest = Manifest::new_for_export(
983 "greptime".to_string(),
984 vec!["public".to_string()],
985 false,
986 time_range.clone(),
987 DataFormat::Parquet,
988 None,
989 )
990 .unwrap();
991 let config = ExportConfig {
992 catalog: "greptime".to_string(),
993 schemas: None,
994 schema_only: false,
995 format: DataFormat::Parquet,
996 force: false,
997 time_range,
998 chunk_time_window: Some(Duration::from_secs(3600)),
999 parallelism: 1,
1000 snapshot_uri: "file:///tmp/snapshot".to_string(),
1001 storage_config: ObjectStoreConfig::default(),
1002 };
1003
1004 let error = validate_resume_config(&manifest, &config)
1005 .err()
1006 .unwrap()
1007 .to_string();
1008 assert!(error.contains("chunk plan"));
1009 }
1010
1011 #[test]
1012 fn test_validate_resume_config_rejects_format_mismatch() {
1013 let manifest = Manifest::new_for_export(
1014 "greptime".to_string(),
1015 vec!["public".to_string()],
1016 false,
1017 TimeRange::unbounded(),
1018 DataFormat::Parquet,
1019 None,
1020 )
1021 .unwrap();
1022 let config = ExportConfig {
1023 catalog: "greptime".to_string(),
1024 schemas: None,
1025 schema_only: false,
1026 format: DataFormat::Csv,
1027 force: false,
1028 time_range: TimeRange::unbounded(),
1029 chunk_time_window: None,
1030 parallelism: 1,
1031 snapshot_uri: "file:///tmp/snapshot".to_string(),
1032 storage_config: ObjectStoreConfig::default(),
1033 };
1034
1035 let error = validate_resume_config(&manifest, &config)
1036 .err()
1037 .unwrap()
1038 .to_string();
1039 assert!(error.contains("format"));
1040 }
1041
1042 #[test]
1043 fn test_validate_resume_config_rejects_time_range_mismatch() {
1044 let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1045 let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 1, 0, 0).unwrap();
1046 let manifest = Manifest::new_for_export(
1047 "greptime".to_string(),
1048 vec!["public".to_string()],
1049 false,
1050 TimeRange::new(Some(start), Some(end)),
1051 DataFormat::Parquet,
1052 None,
1053 )
1054 .unwrap();
1055 let config = ExportConfig {
1056 catalog: "greptime".to_string(),
1057 schemas: None,
1058 schema_only: false,
1059 format: DataFormat::Parquet,
1060 force: false,
1061 time_range: TimeRange::new(Some(start), Some(start)),
1062 chunk_time_window: None,
1063 parallelism: 1,
1064 snapshot_uri: "file:///tmp/snapshot".to_string(),
1065 storage_config: ObjectStoreConfig::default(),
1066 };
1067
1068 let error = validate_resume_config(&manifest, &config)
1069 .err()
1070 .unwrap()
1071 .to_string();
1072 assert!(error.contains("time_range"));
1073 }
1074
1075 #[tokio::test]
1076 async fn test_scan_snapshots_sorts_and_tracks_unreadable_manifests() {
1077 let dir = tempdir().unwrap();
1078 write_test_manifest(
1079 dir.path(),
1080 "older",
1081 test_manifest(
1082 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1083 false,
1084 true,
1085 ),
1086 );
1087 write_test_manifest(
1088 dir.path(),
1089 "newer",
1090 test_manifest(
1091 chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap(),
1092 false,
1093 true,
1094 ),
1095 );
1096
1097 std::fs::create_dir_all(dir.path().join("empty-dir")).unwrap();
1098 std::fs::create_dir_all(dir.path().join("not-snapshot")).unwrap();
1099 std::fs::write(dir.path().join("not-snapshot").join("data.txt"), "x").unwrap();
1100 std::fs::create_dir_all(dir.path().join("broken")).unwrap();
1101 std::fs::write(dir.path().join("broken").join(MANIFEST_FILE), "{not-json").unwrap();
1102
1103 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1104 let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
1105 let result = scan_snapshots(&storage).await.unwrap();
1106
1107 assert_eq!(result.snapshots.len(), 2);
1108 assert_eq!(
1109 result.snapshots[0].manifest.created_at,
1110 chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap()
1111 );
1112 assert_eq!(
1113 result.snapshots[1].manifest.created_at,
1114 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap()
1115 );
1116 assert_eq!(result.unreadable, vec!["broken/".to_string()]);
1117 assert_eq!(result.snapshots[0].path, "newer/");
1118 assert_eq!(result.snapshots[1].path, "older/");
1119 }
1120
1121 #[test]
1122 fn test_snapshot_list_status_and_chunk_summary() {
1123 let schema_only = test_manifest(
1124 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1125 true,
1126 true,
1127 );
1128 assert_eq!(snapshot_status(&schema_only), "schema-only");
1129 assert_eq!(format_list_chunks(&schema_only), "0");
1130
1131 let complete = test_manifest(
1132 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1133 false,
1134 true,
1135 );
1136 assert_eq!(snapshot_status(&complete), "complete");
1137 assert_eq!(format_list_chunks(&complete), "2/2");
1138
1139 let incomplete = test_manifest(
1140 chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1141 false,
1142 false,
1143 );
1144 assert_eq!(snapshot_status(&incomplete), "incomplete");
1145 assert_eq!(format_list_chunks(&incomplete), "1/2");
1146 }
1147
1148 fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) {
1149 let snapshot_dir = root.join(dir);
1150 std::fs::create_dir_all(&snapshot_dir).unwrap();
1151 std::fs::write(
1152 snapshot_dir.join(MANIFEST_FILE),
1153 serde_json::to_vec_pretty(&manifest).unwrap(),
1154 )
1155 .unwrap();
1156 }
1157
1158 fn test_manifest(
1159 created_at: chrono::DateTime<chrono::Utc>,
1160 schema_only: bool,
1161 complete: bool,
1162 ) -> Manifest {
1163 let mut manifest = Manifest::new_for_export(
1164 "greptime".to_string(),
1165 vec!["public".to_string(), "analytics".to_string()],
1166 schema_only,
1167 TimeRange::unbounded(),
1168 DataFormat::Parquet,
1169 None,
1170 )
1171 .unwrap();
1172 manifest.created_at = created_at;
1173 manifest.updated_at = created_at;
1174
1175 if !schema_only {
1176 manifest.chunks.clear();
1177 let mut first = ChunkMeta::new(1, TimeRange::unbounded());
1178 first.mark_completed(vec!["data/public/chunk_1/file.parquet".to_string()], None);
1179 manifest.chunks.push(first);
1180
1181 if complete {
1182 manifest
1183 .chunks
1184 .push(ChunkMeta::skipped(2, TimeRange::unbounded()));
1185 } else {
1186 manifest
1187 .chunks
1188 .push(ChunkMeta::new(2, TimeRange::unbounded()));
1189 }
1190 }
1191
1192 manifest
1193 }
1194}