Skip to main content

cli/data/export_v2/
command.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Export V2 CLI commands.
16
17use std::collections::HashSet;
18use std::io::{self, Write};
19use std::time::Duration;
20
21use async_trait::async_trait;
22use clap::{Parser, Subcommand};
23use common_error::ext::BoxedError;
24use common_telemetry::info;
25use serde_json::Value;
26use snafu::{OptionExt, ResultExt};
27
28use crate::Tool;
29use crate::common::ObjectStoreConfig;
30use crate::data::export_v2::coordinator::export_data;
31use crate::data::export_v2::error::{
32    ChunkTimeWindowRequiresBoundsSnafu, DatabaseSnafu, EmptyResultSnafu, IoSnafu,
33    ManifestVersionMismatchSnafu, Result, ResumeConfigMismatchSnafu, SchemaOnlyArgsNotAllowedSnafu,
34    SchemaOnlyModeMismatchSnafu, SnapshotVerifyFailedSnafu, UnexpectedValueTypeSnafu,
35};
36use crate::data::export_v2::extractor::SchemaExtractor;
37use crate::data::export_v2::manifest::{
38    ChunkMeta, ChunkStatus, DataFormat, MANIFEST_FILE, MANIFEST_VERSION, Manifest, TimeRange,
39};
40use crate::data::export_v2::schema::{DDL_DIR, SCHEMA_DIR, SCHEMAS_FILE};
41use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
42use crate::data::snapshot_storage::{
43    OpenDalStorage, SnapshotStorage, validate_snapshot_uri, validate_uri,
44};
45use crate::data::sql::{escape_sql_identifier, escape_sql_literal};
46use crate::database::{DatabaseClient, parse_proxy_opts};
47
48/// Export V2 commands.
49#[derive(Debug, Subcommand)]
50pub enum ExportV2Command {
51    /// Create a new snapshot.
52    Create(ExportCreateCommand),
53    /// List snapshots under a parent location.
54    List(ExportListCommand),
55    /// Verify snapshot integrity.
56    Verify(ExportVerifyCommand),
57    /// Delete a snapshot and all data under it.
58    Delete(ExportDeleteCommand),
59}
60
61impl ExportV2Command {
62    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
63        match self {
64            ExportV2Command::Create(cmd) => cmd.build().await,
65            ExportV2Command::List(cmd) => cmd.build().await,
66            ExportV2Command::Verify(cmd) => cmd.build().await,
67            ExportV2Command::Delete(cmd) => cmd.build().await,
68        }
69    }
70}
71
72/// List snapshots under a parent location.
73#[derive(Debug, Parser)]
74pub struct ExportListCommand {
75    /// Parent storage location whose direct subdirectories are snapshots.
76    #[clap(long)]
77    location: String,
78
79    /// Object store configuration for remote storage backends.
80    #[clap(flatten)]
81    storage: ObjectStoreConfig,
82}
83
84impl ExportListCommand {
85    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
86        validate_uri(&self.location).map_err(BoxedError::new)?;
87        let storage = OpenDalStorage::from_parent_uri(&self.location, &self.storage)
88            .map_err(BoxedError::new)?;
89
90        Ok(Box::new(ExportList {
91            location: self.location.clone(),
92            storage,
93        }))
94    }
95}
96
97/// Export list tool implementation.
98pub struct ExportList {
99    location: String,
100    storage: OpenDalStorage,
101}
102
103#[async_trait]
104impl Tool for ExportList {
105    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
106        self.run().await.map_err(BoxedError::new)
107    }
108}
109
110impl ExportList {
111    async fn run(&self) -> Result<()> {
112        let result = scan_snapshots(&self.storage).await?;
113
114        println!("Scanning: {}", self.location);
115        if result.snapshots.is_empty() {
116            println!("No snapshots found.");
117        } else {
118            print_snapshot_list(&result.snapshots, result.unreadable.len());
119        }
120        print_unreadable_warnings(&result.unreadable);
121
122        Ok(())
123    }
124}
125
126/// Verify snapshot integrity.
127#[derive(Debug, Parser)]
128pub struct ExportVerifyCommand {
129    /// Snapshot storage location (e.g., s3://bucket/path, file:///tmp/backup).
130    #[clap(long)]
131    snapshot: String,
132
133    /// Object store configuration for remote storage backends.
134    #[clap(flatten)]
135    storage: ObjectStoreConfig,
136}
137
138impl ExportVerifyCommand {
139    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
140        validate_uri(&self.snapshot).map_err(BoxedError::new)?;
141        let storage =
142            OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
143
144        Ok(Box::new(ExportVerify {
145            snapshot: self.snapshot.clone(),
146            storage,
147        }))
148    }
149}
150
151/// Export verify tool implementation.
152pub struct ExportVerify {
153    snapshot: String,
154    storage: OpenDalStorage,
155}
156
157#[async_trait]
158impl Tool for ExportVerify {
159    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
160        self.run().await.map_err(BoxedError::new)
161    }
162}
163
164impl ExportVerify {
165    async fn run(&self) -> Result<()> {
166        let report = verify_snapshot(&self.storage).await?;
167        print_verify_report(&self.snapshot, &report);
168
169        if report.has_problems() {
170            return SnapshotVerifyFailedSnafu {
171                errors: report.error_count(),
172                warnings: report.warning_count(),
173            }
174            .fail();
175        }
176
177        Ok(())
178    }
179}
180
181/// Delete a snapshot and all data under it.
182#[derive(Debug, Parser)]
183pub struct ExportDeleteCommand {
184    /// Snapshot storage location (e.g., s3://bucket/path, file:///tmp/backup).
185    #[clap(long)]
186    snapshot: String,
187
188    /// Skip interactive confirmation.
189    #[clap(long = "no-confirm", alias = "yes")]
190    skip_confirmation: bool,
191
192    /// Object store configuration for remote storage backends.
193    #[clap(flatten)]
194    storage: ObjectStoreConfig,
195}
196
197impl ExportDeleteCommand {
198    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
199        validate_snapshot_uri(&self.snapshot).map_err(BoxedError::new)?;
200        let storage =
201            OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
202
203        Ok(Box::new(ExportDelete {
204            snapshot: self.snapshot.clone(),
205            skip_confirmation: self.skip_confirmation,
206            storage,
207        }))
208    }
209}
210
211/// Export delete tool implementation.
212pub struct ExportDelete {
213    snapshot: String,
214    skip_confirmation: bool,
215    storage: OpenDalStorage,
216}
217
218#[async_trait]
219impl Tool for ExportDelete {
220    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
221        self.run().await.map_err(BoxedError::new)
222    }
223}
224
225impl ExportDelete {
226    async fn run(&self) -> Result<()> {
227        self.run_with_confirmation(confirm_delete).await
228    }
229
230    async fn run_with_confirmation<F>(&self, confirm: F) -> Result<()>
231    where
232        F: FnOnce(&str) -> Result<bool>,
233    {
234        let manifest = self.storage.read_manifest().await?;
235        print_delete_summary(&self.snapshot, &manifest);
236
237        if !self.skip_confirmation && !confirm(&self.snapshot)? {
238            println!("Deletion cancelled.");
239            return Ok(());
240        }
241
242        println!("Deleting snapshot...");
243        self.storage.delete_snapshot().await?;
244        println!("Snapshot deleted successfully.");
245
246        Ok(())
247    }
248}
249
250/// Create a new snapshot.
251#[derive(Debug, Parser)]
252pub struct ExportCreateCommand {
253    /// Server address to connect (e.g., 127.0.0.1:4000).
254    #[clap(long)]
255    addr: String,
256
257    /// Target storage location (e.g., s3://bucket/path, file:///tmp/backup).
258    #[clap(long)]
259    to: String,
260
261    /// Catalog name.
262    #[clap(long, default_value = "greptime")]
263    catalog: String,
264
265    /// Schema list to export (default: all non-system schemas).
266    /// Can be specified multiple times or comma-separated.
267    #[clap(long, value_delimiter = ',')]
268    schemas: Vec<String>,
269
270    /// Export schema only, no data.
271    #[clap(long)]
272    schema_only: bool,
273
274    /// Time range start (ISO 8601 format, e.g., 2024-01-01T00:00:00Z).
275    #[clap(long)]
276    start_time: Option<String>,
277
278    /// Time range end (ISO 8601 format, e.g., 2024-12-31T23:59:59Z).
279    #[clap(long)]
280    end_time: Option<String>,
281
282    /// Chunk time window (e.g., 1h, 6h, 1d, 7d).
283    /// Requires both --start-time and --end-time when specified.
284    #[clap(long, value_parser = humantime::parse_duration)]
285    chunk_time_window: Option<Duration>,
286
287    /// Data format: parquet, csv, json.
288    #[clap(long, value_enum, default_value = "parquet")]
289    format: DataFormat,
290
291    /// Delete existing snapshot and recreate.
292    #[clap(long)]
293    force: bool,
294
295    /// Parallelism for COPY DATABASE execution (server-side, per schema per chunk).
296    #[clap(long, default_value = "1")]
297    parallelism: usize,
298
299    /// Basic authentication (user:password).
300    #[clap(long)]
301    auth_basic: Option<String>,
302
303    /// Request timeout.
304    #[clap(long, value_parser = humantime::parse_duration)]
305    timeout: Option<Duration>,
306
307    /// Proxy server address.
308    ///
309    /// If set, it overrides the system proxy unless `--no-proxy` is specified.
310    /// If neither `--proxy` nor `--no-proxy` is set, system proxy (env) may be used.
311    #[clap(long)]
312    proxy: Option<String>,
313
314    /// Disable all proxy usage (ignores `--proxy` and system proxy).
315    ///
316    /// When set and `--proxy` is not provided, this explicitly disables system proxy.
317    #[clap(long)]
318    no_proxy: bool,
319
320    /// Object store configuration for remote storage backends.
321    #[clap(flatten)]
322    storage: ObjectStoreConfig,
323}
324
325impl ExportCreateCommand {
326    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
327        // Validate URI format
328        validate_uri(&self.to).map_err(BoxedError::new)?;
329
330        let time_range = TimeRange::parse(self.start_time.as_deref(), self.end_time.as_deref())
331            .map_err(BoxedError::new)?;
332        if self.chunk_time_window.is_some() && !time_range.is_bounded() {
333            return ChunkTimeWindowRequiresBoundsSnafu
334                .fail()
335                .map_err(BoxedError::new);
336        }
337        if self.schema_only {
338            let mut invalid_args = Vec::new();
339            if self.start_time.is_some() {
340                invalid_args.push("--start-time");
341            }
342            if self.end_time.is_some() {
343                invalid_args.push("--end-time");
344            }
345            if self.chunk_time_window.is_some() {
346                invalid_args.push("--chunk-time-window");
347            }
348            if self.format != DataFormat::Parquet {
349                invalid_args.push("--format");
350            }
351            if self.parallelism != 1 {
352                invalid_args.push("--parallelism");
353            }
354            if !invalid_args.is_empty() {
355                return SchemaOnlyArgsNotAllowedSnafu {
356                    args: invalid_args.join(", "),
357                }
358                .fail()
359                .map_err(BoxedError::new);
360            }
361        }
362
363        // Parse schemas (empty vec means all schemas)
364        let schemas = if self.schemas.is_empty() {
365            None
366        } else {
367            Some(self.schemas.clone())
368        };
369
370        // Build storage
371        let storage = OpenDalStorage::from_uri(&self.to, &self.storage).map_err(BoxedError::new)?;
372
373        // Build database client
374        let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
375        let database_client = DatabaseClient::new(
376            self.addr.clone(),
377            self.catalog.clone(),
378            self.auth_basic.clone(),
379            self.timeout.unwrap_or(Duration::from_secs(60)),
380            proxy,
381            self.no_proxy,
382        );
383
384        Ok(Box::new(ExportCreate {
385            config: ExportConfig {
386                catalog: self.catalog.clone(),
387                schemas,
388                schema_only: self.schema_only,
389                format: self.format,
390                force: self.force,
391                time_range,
392                chunk_time_window: self.chunk_time_window,
393                parallelism: self.parallelism,
394                snapshot_uri: self.to.clone(),
395                storage_config: self.storage.clone(),
396            },
397            storage: Box::new(storage),
398            database_client,
399        }))
400    }
401}
402
403/// Export tool implementation.
404pub struct ExportCreate {
405    config: ExportConfig,
406    storage: Box<dyn SnapshotStorage>,
407    database_client: DatabaseClient,
408}
409
410struct ExportConfig {
411    catalog: String,
412    schemas: Option<Vec<String>>,
413    schema_only: bool,
414    format: DataFormat,
415    force: bool,
416    time_range: TimeRange,
417    chunk_time_window: Option<Duration>,
418    parallelism: usize,
419    snapshot_uri: String,
420    storage_config: ObjectStoreConfig,
421}
422
423#[async_trait]
424impl Tool for ExportCreate {
425    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
426        self.run().await.map_err(BoxedError::new)
427    }
428}
429
430impl ExportCreate {
431    async fn run(&self) -> Result<()> {
432        // 1. Check if snapshot exists
433        let exists = self.storage.exists().await?;
434
435        if exists {
436            if self.config.force {
437                info!("Deleting existing snapshot (--force)");
438                self.storage.delete_snapshot().await?;
439            } else {
440                // Resume mode - read existing manifest
441                let mut manifest = self.storage.read_manifest().await?;
442
443                // Check version compatibility
444                if manifest.version != MANIFEST_VERSION {
445                    return ManifestVersionMismatchSnafu {
446                        expected: MANIFEST_VERSION,
447                        found: manifest.version,
448                    }
449                    .fail();
450                }
451
452                validate_resume_config(&manifest, &self.config)?;
453
454                info!(
455                    "Resuming existing snapshot: {} (completed: {}/{} chunks)",
456                    manifest.snapshot_id,
457                    manifest.completed_count(),
458                    manifest.chunks.len()
459                );
460
461                if manifest.is_complete() {
462                    info!("Snapshot is already complete");
463                    return Ok(());
464                }
465
466                if manifest.schema_only {
467                    return Ok(());
468                }
469
470                export_data(
471                    self.storage.as_ref(),
472                    &self.database_client,
473                    &self.config.snapshot_uri,
474                    &self.config.storage_config,
475                    &mut manifest,
476                    self.config.parallelism,
477                )
478                .await?;
479                return Ok(());
480            }
481        }
482
483        // 2. Get schema list
484        let extractor = SchemaExtractor::new(&self.database_client, &self.config.catalog);
485        let schema_snapshot = extractor.extract(self.config.schemas.as_deref()).await?;
486
487        let schema_names: Vec<String> = schema_snapshot
488            .schemas
489            .iter()
490            .map(|s| s.name.clone())
491            .collect();
492        info!("Exporting schemas: {:?}", schema_names);
493
494        // 3. Create manifest
495        let mut manifest = Manifest::new_for_export(
496            self.config.catalog.clone(),
497            schema_names.clone(),
498            self.config.schema_only,
499            self.config.time_range.clone(),
500            self.config.format,
501            self.config.chunk_time_window,
502        )?;
503
504        // 4. Write schema files
505        self.storage.write_schema(&schema_snapshot).await?;
506        info!("Exported {} schemas", schema_snapshot.schemas.len());
507
508        // 5. Export DDL files for import recovery.
509        let ddl_by_schema = self.build_ddl_by_schema(&schema_names).await?;
510        for (schema, ddl) in ddl_by_schema {
511            let ddl_path = ddl_path_for_schema(&schema);
512            self.storage.write_text(&ddl_path, &ddl).await?;
513            info!("Exported DDL for schema {} to {}", schema, ddl_path);
514        }
515
516        // 6. Write manifest after schema artifacts and before any data export.
517        //
518        // The manifest is the snapshot commit point: only write it after the schema
519        // index and all DDL files are durable, so a crash cannot leave a "valid"
520        // snapshot that is missing required schema artifacts. For full exports we
521        // still need the manifest before data copy starts, because chunk resume is
522        // tracked by updating this manifest in place.
523        self.storage.write_manifest(&manifest).await?;
524        info!("Snapshot created: {}", manifest.snapshot_id);
525
526        if !self.config.schema_only {
527            export_data(
528                self.storage.as_ref(),
529                &self.database_client,
530                &self.config.snapshot_uri,
531                &self.config.storage_config,
532                &mut manifest,
533                self.config.parallelism,
534            )
535            .await?;
536        }
537
538        Ok(())
539    }
540
541    async fn build_ddl_by_schema(&self, schema_names: &[String]) -> Result<Vec<(String, String)>> {
542        let mut schemas = schema_names.to_vec();
543        schemas.sort();
544
545        let mut ddl_by_schema = Vec::with_capacity(schemas.len());
546        for schema in schemas {
547            let create_database = self.show_create("DATABASE", &schema, None).await?;
548
549            let (mut physical_tables, mut tables, mut views) =
550                self.get_schema_objects(&schema).await?;
551            physical_tables.sort();
552            let mut physical_ddls = Vec::with_capacity(physical_tables.len());
553            for table in physical_tables {
554                physical_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
555            }
556
557            tables.sort();
558            let mut table_ddls = Vec::with_capacity(tables.len());
559            for table in tables {
560                table_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
561            }
562
563            views.sort();
564            let mut view_ddls = Vec::with_capacity(views.len());
565            for view in views {
566                view_ddls.push(self.show_create("VIEW", &schema, Some(&view)).await?);
567            }
568
569            let ddl = build_schema_ddl(
570                &schema,
571                create_database,
572                physical_ddls,
573                table_ddls,
574                view_ddls,
575            );
576            ddl_by_schema.push((schema, ddl));
577        }
578
579        Ok(ddl_by_schema)
580    }
581
582    async fn get_schema_objects(
583        &self,
584        schema: &str,
585    ) -> Result<(Vec<String>, Vec<String>, Vec<String>)> {
586        let physical_tables = self.get_metric_physical_tables(schema).await?;
587        let physical_set: HashSet<&str> = physical_tables.iter().map(String::as_str).collect();
588        let sql = format!(
589            "SELECT table_name, table_type FROM information_schema.tables \
590             WHERE table_catalog = '{}' AND table_schema = '{}' \
591             AND (table_type = 'BASE TABLE' OR table_type = 'VIEW')",
592            escape_sql_literal(&self.config.catalog),
593            escape_sql_literal(schema)
594        );
595        let records: Option<Vec<Vec<Value>>> = self
596            .database_client
597            .sql_in_public(&sql)
598            .await
599            .context(DatabaseSnafu)?;
600
601        let mut tables = Vec::new();
602        let mut views = Vec::new();
603        if let Some(rows) = records {
604            for row in rows {
605                let name = match row.first() {
606                    Some(Value::String(name)) => name.clone(),
607                    _ => return UnexpectedValueTypeSnafu.fail(),
608                };
609                let table_type = match row.get(1) {
610                    Some(Value::String(table_type)) => table_type.as_str(),
611                    _ => return UnexpectedValueTypeSnafu.fail(),
612                };
613                if !physical_set.contains(name.as_str()) {
614                    if table_type == "VIEW" {
615                        views.push(name);
616                    } else {
617                        tables.push(name);
618                    }
619                }
620            }
621        }
622
623        Ok((physical_tables, tables, views))
624    }
625
626    async fn get_metric_physical_tables(&self, schema: &str) -> Result<Vec<String>> {
627        let sql = format!(
628            "SELECT DISTINCT table_name FROM information_schema.columns \
629             WHERE table_catalog = '{}' AND table_schema = '{}' AND column_name = '__tsid'",
630            escape_sql_literal(&self.config.catalog),
631            escape_sql_literal(schema)
632        );
633        let records: Option<Vec<Vec<Value>>> = self
634            .database_client
635            .sql_in_public(&sql)
636            .await
637            .context(DatabaseSnafu)?;
638
639        let mut tables = HashSet::new();
640        if let Some(rows) = records {
641            for row in rows {
642                let name = match row.first() {
643                    Some(Value::String(name)) => name.clone(),
644                    _ => return UnexpectedValueTypeSnafu.fail(),
645                };
646                tables.insert(name);
647            }
648        }
649
650        Ok(tables.into_iter().collect())
651    }
652
653    async fn show_create(
654        &self,
655        show_type: &str,
656        schema: &str,
657        table: Option<&str>,
658    ) -> Result<String> {
659        let sql = match table {
660            Some(table) => format!(
661                r#"SHOW CREATE {} "{}"."{}"."{}""#,
662                show_type,
663                escape_sql_identifier(&self.config.catalog),
664                escape_sql_identifier(schema),
665                escape_sql_identifier(table)
666            ),
667            None => format!(
668                r#"SHOW CREATE {} "{}"."{}""#,
669                show_type,
670                escape_sql_identifier(&self.config.catalog),
671                escape_sql_identifier(schema)
672            ),
673        };
674
675        let records: Option<Vec<Vec<Value>>> = self
676            .database_client
677            .sql_in_public(&sql)
678            .await
679            .context(DatabaseSnafu)?;
680        let rows = records.context(EmptyResultSnafu)?;
681        let row = rows.first().context(EmptyResultSnafu)?;
682        let Some(Value::String(create)) = row.get(1) else {
683            return UnexpectedValueTypeSnafu.fail();
684        };
685
686        Ok(format!("{};\n", create))
687    }
688}
689
690fn build_schema_ddl(
691    schema: &str,
692    create_database: String,
693    physical_tables: Vec<String>,
694    tables: Vec<String>,
695    views: Vec<String>,
696) -> String {
697    let mut ddl = String::new();
698    ddl.push_str(&format!("-- Schema: {}\n", schema));
699    ddl.push_str(&create_database);
700    for stmt in physical_tables {
701        ddl.push_str(&stmt);
702    }
703    for stmt in tables {
704        ddl.push_str(&stmt);
705    }
706    for stmt in views {
707        ddl.push_str(&stmt);
708    }
709    ddl.push('\n');
710    ddl
711}
712
713fn validate_resume_config(manifest: &Manifest, config: &ExportConfig) -> Result<()> {
714    if manifest.schema_only != config.schema_only {
715        return SchemaOnlyModeMismatchSnafu {
716            existing_schema_only: manifest.schema_only,
717            requested_schema_only: config.schema_only,
718        }
719        .fail();
720    }
721
722    if manifest.catalog != config.catalog {
723        return ResumeConfigMismatchSnafu {
724            field: "catalog",
725            existing: manifest.catalog.clone(),
726            requested: config.catalog.clone(),
727        }
728        .fail();
729    }
730
731    // If no schema filter is provided on resume, inherit the existing snapshot
732    // selection instead of reinterpreting the request as "all schemas".
733    if let Some(requested_schemas) = &config.schemas
734        && !schema_selection_matches(&manifest.schemas, requested_schemas)
735    {
736        return ResumeConfigMismatchSnafu {
737            field: "schemas",
738            existing: format_schema_selection(&manifest.schemas),
739            requested: format_schema_selection(requested_schemas),
740        }
741        .fail();
742    }
743
744    if manifest.time_range != config.time_range {
745        return ResumeConfigMismatchSnafu {
746            field: "time_range",
747            existing: format!("{:?}", manifest.time_range),
748            requested: format!("{:?}", config.time_range),
749        }
750        .fail();
751    }
752
753    if manifest.format != config.format {
754        return ResumeConfigMismatchSnafu {
755            field: "format",
756            existing: manifest.format.to_string(),
757            requested: config.format.to_string(),
758        }
759        .fail();
760    }
761
762    let expected_plan = Manifest::new_for_export(
763        manifest.catalog.clone(),
764        manifest.schemas.clone(),
765        config.schema_only,
766        config.time_range.clone(),
767        config.format,
768        config.chunk_time_window,
769    )?;
770    if !chunk_plan_matches(manifest, &expected_plan) {
771        return ResumeConfigMismatchSnafu {
772            field: "chunk plan",
773            existing: format_chunk_plan(&manifest.chunks),
774            requested: format_chunk_plan(&expected_plan.chunks),
775        }
776        .fail();
777    }
778
779    Ok(())
780}
781
782fn schema_selection_matches(existing: &[String], requested: &[String]) -> bool {
783    canonical_schema_selection(existing) == canonical_schema_selection(requested)
784}
785
786fn canonical_schema_selection(schemas: &[String]) -> Vec<String> {
787    let mut canonicalized = Vec::new();
788    let mut seen = HashSet::new();
789
790    for schema in schemas {
791        let normalized = schema.to_ascii_lowercase();
792        if seen.insert(normalized.clone()) {
793            canonicalized.push(normalized);
794        }
795    }
796
797    canonicalized.sort();
798    canonicalized
799}
800
801fn format_schema_selection(schemas: &[String]) -> String {
802    format!("[{}]", schemas.join(", "))
803}
804
805fn chunk_plan_matches(existing: &Manifest, expected: &Manifest) -> bool {
806    existing.chunks.len() == expected.chunks.len()
807        && existing
808            .chunks
809            .iter()
810            .zip(&expected.chunks)
811            .all(|(left, right)| left.id == right.id && left.time_range == right.time_range)
812}
813
814fn format_chunk_plan(chunks: &[ChunkMeta]) -> String {
815    let items = chunks
816        .iter()
817        .map(|chunk| format!("#{}:{:?}", chunk.id, chunk.time_range))
818        .collect::<Vec<_>>();
819    format!("[{}]", items.join(", "))
820}
821
822#[derive(Debug)]
823struct SnapshotListEntry {
824    path: String,
825    manifest: Manifest,
826}
827
828#[derive(Debug, Default)]
829struct SnapshotScanResult {
830    snapshots: Vec<SnapshotListEntry>,
831    unreadable: Vec<String>,
832}
833
834async fn scan_snapshots(storage: &OpenDalStorage) -> Result<SnapshotScanResult> {
835    let mut result = SnapshotScanResult::default();
836    for dir in storage.list_direct_child_dirs().await? {
837        let manifest_path = format!("{}/{}", dir.trim_matches('/'), MANIFEST_FILE);
838        let Some(data) = storage.read_file_if_exists(&manifest_path).await? else {
839            continue;
840        };
841
842        match serde_json::from_slice::<Manifest>(&data) {
843            Ok(manifest) => result.snapshots.push(SnapshotListEntry {
844                path: format!("{}/", dir.trim_matches('/')),
845                manifest,
846            }),
847            Err(_) => result
848                .unreadable
849                .push(format!("{}/", dir.trim_matches('/'))),
850        }
851    }
852
853    result
854        .snapshots
855        .sort_by_key(|entry| std::cmp::Reverse(entry.manifest.created_at));
856    result.unreadable.sort();
857    Ok(result)
858}
859
860fn print_snapshot_list(snapshots: &[SnapshotListEntry], unreadable_count: usize) {
861    if unreadable_count == 0 {
862        println!("Found {} snapshots:", snapshots.len());
863    } else {
864        println!(
865            "Found {} snapshots ({} {} skipped: unreadable manifest):",
866            snapshots.len(),
867            unreadable_count,
868            directory_word(unreadable_count)
869        );
870    }
871    println!();
872    println!(
873        "  {:<24}  {:<36}  {:<19}  {:<9}  {:<7}  {:<6}  Status",
874        "Path", "ID", "Created", "Catalog", "Schemas", "Chunks"
875    );
876    println!(
877        "  {:<24}  {:<36}  {:<19}  {:<9}  {:<7}  {:<6}  {:<10}",
878        "-".repeat(24),
879        "-".repeat(36),
880        "-".repeat(19),
881        "-".repeat(9),
882        "-".repeat(7),
883        "-".repeat(6),
884        "-".repeat(10)
885    );
886    for entry in snapshots {
887        let manifest = &entry.manifest;
888        println!(
889            "  {:<24}  {:<36}  {:<19}  {:<9}  {:<7}  {:<6}  {}",
890            entry.path,
891            manifest.snapshot_id,
892            manifest.created_at.format("%Y-%m-%d %H:%M:%S"),
893            manifest.catalog,
894            manifest.schemas.len(),
895            format_list_chunks(manifest),
896            snapshot_status(manifest)
897        );
898    }
899}
900
901fn print_unreadable_warnings(unreadable: &[String]) {
902    if unreadable.is_empty() {
903        return;
904    }
905
906    println!();
907    println!(
908        "Warning: {} {} had corrupt/unreadable manifest.json:",
909        unreadable.len(),
910        directory_word(unreadable.len())
911    );
912    for path in unreadable {
913        println!("  - {}", path);
914    }
915}
916
917fn directory_word(count: usize) -> &'static str {
918    if count == 1 {
919        "directory"
920    } else {
921        "directories"
922    }
923}
924
925fn snapshot_status(manifest: &Manifest) -> &'static str {
926    if manifest.schema_only {
927        "schema-only"
928    } else if manifest.is_complete() {
929        "complete"
930    } else {
931        "incomplete"
932    }
933}
934
935fn format_list_chunks(manifest: &Manifest) -> String {
936    let total = manifest.chunks.len();
937    if total == 0 {
938        return "0".to_string();
939    }
940
941    format!(
942        "{}/{}",
943        manifest.completed_count() + manifest.skipped_count(),
944        total
945    )
946}
947
948#[derive(Debug, Clone, Copy, PartialEq, Eq)]
949enum VerifySeverity {
950    Error,
951    Warn,
952}
953
954impl VerifySeverity {
955    fn as_str(self) -> &'static str {
956        match self {
957            VerifySeverity::Error => "ERROR",
958            VerifySeverity::Warn => "WARN",
959        }
960    }
961}
962
963#[derive(Debug)]
964struct VerifyProblem {
965    severity: VerifySeverity,
966    message: String,
967}
968
969#[derive(Debug, Default)]
970struct VerifyChunkSummary {
971    total: usize,
972    completed: usize,
973    skipped: usize,
974    pending: usize,
975    in_progress: usize,
976    failed: usize,
977}
978
979#[derive(Debug)]
980struct VerifyReport {
981    manifest: Manifest,
982    schema_index_exists: bool,
983    ddl_file_count: usize,
984    chunk_summary: VerifyChunkSummary,
985    data_files_total: usize,
986    data_files_verified: usize,
987    problems: Vec<VerifyProblem>,
988}
989
990impl VerifyReport {
991    fn error_count(&self) -> usize {
992        self.problems
993            .iter()
994            .filter(|problem| problem.severity == VerifySeverity::Error)
995            .count()
996    }
997
998    fn warning_count(&self) -> usize {
999        self.problems
1000            .iter()
1001            .filter(|problem| problem.severity == VerifySeverity::Warn)
1002            .count()
1003    }
1004
1005    fn has_problems(&self) -> bool {
1006        !self.problems.is_empty()
1007    }
1008
1009    fn push_error(&mut self, message: impl Into<String>) {
1010        self.problems.push(VerifyProblem {
1011            severity: VerifySeverity::Error,
1012            message: message.into(),
1013        });
1014    }
1015
1016    fn push_warn(&mut self, message: impl Into<String>) {
1017        self.problems.push(VerifyProblem {
1018            severity: VerifySeverity::Warn,
1019            message: message.into(),
1020        });
1021    }
1022}
1023
1024async fn verify_snapshot(storage: &OpenDalStorage) -> Result<VerifyReport> {
1025    let manifest = storage.read_manifest().await?;
1026    let schema_index_path = format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE);
1027    let ddl_prefix = format!("{}/{}/", SCHEMA_DIR, DDL_DIR);
1028    let schema_index_exists = storage.file_exists(&schema_index_path).await?;
1029    let ddl_files: HashSet<_> = storage
1030        .list_files_recursive(&ddl_prefix)
1031        .await?
1032        .into_iter()
1033        .collect();
1034    let ddl_file_count = ddl_files
1035        .iter()
1036        .filter(|path| path.ends_with(".sql"))
1037        .count();
1038
1039    let mut report = VerifyReport {
1040        manifest,
1041        schema_index_exists,
1042        ddl_file_count,
1043        chunk_summary: VerifyChunkSummary::default(),
1044        data_files_total: 0,
1045        data_files_verified: 0,
1046        problems: Vec::new(),
1047    };
1048
1049    if report.manifest.version != MANIFEST_VERSION {
1050        report.push_error(format!(
1051            "Manifest version mismatch: expected {}, found {}",
1052            MANIFEST_VERSION, report.manifest.version
1053        ));
1054    }
1055
1056    if !report.schema_index_exists {
1057        report.push_warn(format!("Missing schema index '{}'", schema_index_path));
1058    }
1059
1060    for schema in &report.manifest.schemas {
1061        let ddl_path = ddl_path_for_schema(schema);
1062        if !ddl_files.contains(ddl_path.as_str()) {
1063            report.problems.push(VerifyProblem {
1064                severity: VerifySeverity::Error,
1065                message: format!("Schema '{}': missing DDL file '{}'", schema, ddl_path),
1066            });
1067        }
1068    }
1069
1070    report.chunk_summary = summarize_chunks(&report.manifest);
1071    if report.manifest.schema_only {
1072        let chunk_count = report.manifest.chunks.len();
1073        if chunk_count > 0 {
1074            report.push_error(format!(
1075                "Schema-only snapshot should not contain data chunks (found {})",
1076                chunk_count
1077            ));
1078        }
1079        let data_files = storage.list_files_recursive("data/").await?;
1080        if let Some(path) = data_files.first() {
1081            report.push_error(format!(
1082                "Schema-only snapshot should not contain data files (found '{}')",
1083                path
1084            ));
1085        }
1086    } else if report.manifest.chunks.is_empty() {
1087        report.push_error("Full snapshot should contain at least one data chunk");
1088    } else {
1089        verify_chunks_and_data_files(storage, &mut report).await?;
1090    }
1091
1092    Ok(report)
1093}
1094
1095fn summarize_chunks(manifest: &Manifest) -> VerifyChunkSummary {
1096    VerifyChunkSummary {
1097        total: manifest.chunks.len(),
1098        completed: manifest.completed_count(),
1099        skipped: manifest.skipped_count(),
1100        pending: manifest.pending_count(),
1101        in_progress: manifest.in_progress_count(),
1102        failed: manifest.failed_count(),
1103    }
1104}
1105
1106async fn verify_chunks_and_data_files(
1107    storage: &OpenDalStorage,
1108    report: &mut VerifyReport,
1109) -> Result<()> {
1110    let existing_files: HashSet<_> = storage
1111        .list_files_recursive("data/")
1112        .await?
1113        .into_iter()
1114        .collect();
1115    let mut data_files_total = 0;
1116    let mut data_files_verified = 0;
1117    let mut problems = Vec::new();
1118    let mut seen_chunk_ids = HashSet::new();
1119    let mut claimed_data_files = HashSet::new();
1120
1121    for chunk in &report.manifest.chunks {
1122        if !seen_chunk_ids.insert(chunk.id) {
1123            problems.push(VerifyProblem {
1124                severity: VerifySeverity::Error,
1125                message: format!("Chunk {}: duplicate chunk id", chunk.id),
1126            });
1127        }
1128        for file in &chunk.files {
1129            if let Some(path) = safe_manifest_data_file_path(file) {
1130                claimed_data_files.insert(path.to_string());
1131            }
1132        }
1133
1134        match chunk.status {
1135            ChunkStatus::Completed => {
1136                if chunk.files.is_empty() {
1137                    problems.push(VerifyProblem {
1138                        severity: VerifySeverity::Error,
1139                        message: format!("Chunk {}: completed chunk has no data files", chunk.id),
1140                    });
1141                    continue;
1142                }
1143                let allowed_prefixes = report
1144                    .manifest
1145                    .schemas
1146                    .iter()
1147                    .map(|schema| data_dir_for_schema_chunk(schema, chunk.id))
1148                    .collect::<Vec<_>>();
1149                for file in &chunk.files {
1150                    data_files_total += 1;
1151                    let Some(path) = valid_manifest_data_file_path(file, &allowed_prefixes) else {
1152                        problems.push(VerifyProblem {
1153                            severity: VerifySeverity::Error,
1154                            message: format!(
1155                                "Chunk {}: invalid data file path '{}'",
1156                                chunk.id, file
1157                            ),
1158                        });
1159                        continue;
1160                    };
1161
1162                    if existing_files.contains(path) {
1163                        data_files_verified += 1;
1164                    } else {
1165                        problems.push(VerifyProblem {
1166                            severity: VerifySeverity::Error,
1167                            message: format!("Chunk {}: missing file '{}'", chunk.id, path),
1168                        });
1169                    }
1170                }
1171            }
1172            ChunkStatus::Skipped => {
1173                if !chunk.files.is_empty() {
1174                    problems.push(VerifyProblem {
1175                        severity: VerifySeverity::Error,
1176                        message: format!(
1177                            "Chunk {}: skipped chunk should not list data files",
1178                            chunk.id
1179                        ),
1180                    });
1181                }
1182            }
1183            ChunkStatus::Pending => {
1184                problems.push(VerifyProblem {
1185                    severity: VerifySeverity::Error,
1186                    message: format!("Chunk {}: status is 'pending'", chunk.id),
1187                });
1188            }
1189            ChunkStatus::InProgress => {
1190                problems.push(VerifyProblem {
1191                    severity: VerifySeverity::Error,
1192                    message: format!("Chunk {}: status is 'in_progress'", chunk.id),
1193                });
1194            }
1195            ChunkStatus::Failed => {
1196                let reason = chunk.error.as_deref().unwrap_or("unknown error");
1197                problems.push(VerifyProblem {
1198                    severity: VerifySeverity::Error,
1199                    message: format!("Chunk {}: status is 'failed' (error: {})", chunk.id, reason),
1200                });
1201            }
1202        }
1203    }
1204
1205    for path in &existing_files {
1206        if !claimed_data_files.contains(path) {
1207            problems.push(VerifyProblem {
1208                severity: VerifySeverity::Error,
1209                message: format!("Unexpected data file '{}' is not listed in manifest", path),
1210            });
1211        }
1212    }
1213
1214    report.data_files_total = data_files_total;
1215    report.data_files_verified = data_files_verified;
1216    report.problems.extend(problems);
1217
1218    Ok(())
1219}
1220
1221fn valid_manifest_data_file_path<'a>(
1222    path: &'a str,
1223    allowed_prefixes: &[String],
1224) -> Option<&'a str> {
1225    let normalized = safe_manifest_data_file_path(path)?;
1226
1227    if !allowed_prefixes
1228        .iter()
1229        .any(|prefix| normalized.starts_with(prefix))
1230    {
1231        return None;
1232    }
1233
1234    Some(normalized)
1235}
1236
1237fn safe_manifest_data_file_path(path: &str) -> Option<&str> {
1238    let normalized = path.trim_start_matches('/');
1239    if normalized.is_empty() || !normalized.starts_with("data/") {
1240        return None;
1241    }
1242
1243    if normalized
1244        .split('/')
1245        .any(|segment| segment.is_empty() || segment == "." || segment == "..")
1246    {
1247        return None;
1248    }
1249
1250    Some(normalized)
1251}
1252
1253fn print_verify_report(snapshot: &str, report: &VerifyReport) {
1254    println!("Verifying snapshot: {}", report.manifest.snapshot_id);
1255    println!("  Location:     {}", snapshot);
1256    if report.manifest.version == MANIFEST_VERSION {
1257        println!("  Manifest:     OK (version {})", report.manifest.version);
1258    } else {
1259        println!(
1260            "  Manifest:     ERROR (version {}, expected {})",
1261            report.manifest.version, MANIFEST_VERSION
1262        );
1263    }
1264    println!(
1265        "  Schema files: {}",
1266        if report.schema_index_exists {
1267            format!("OK ({})", SCHEMAS_FILE)
1268        } else {
1269            format!("WARN (missing {})", SCHEMAS_FILE)
1270        }
1271    );
1272    if report.ddl_file_count > 0 {
1273        println!("  DDL files:    {} file(s) found", report.ddl_file_count);
1274    } else {
1275        println!("  DDL files:    not present");
1276    }
1277
1278    let chunks = &report.chunk_summary;
1279    println!(
1280        "  Chunks:       {} total ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1281        chunks.total,
1282        chunks.completed,
1283        chunks.skipped,
1284        chunks.pending,
1285        chunks.in_progress,
1286        chunks.failed
1287    );
1288
1289    if report.manifest.schema_only {
1290        println!("  Data files:   skipped (schema-only)");
1291    } else {
1292        println!(
1293            "  Data files:   {}/{} files verified",
1294            report.data_files_verified, report.data_files_total
1295        );
1296    }
1297
1298    if report.problems.is_empty() {
1299        println!();
1300        println!("Snapshot is valid.");
1301        return;
1302    }
1303
1304    println!();
1305    println!("Problems found:");
1306    for problem in &report.problems {
1307        println!("  [{}] {}", problem.severity.as_str(), problem.message);
1308    }
1309    println!();
1310    println!(
1311        "Snapshot has {} error(s), {} warning(s).",
1312        report.error_count(),
1313        report.warning_count()
1314    );
1315}
1316
1317fn print_delete_summary(snapshot: &str, manifest: &Manifest) {
1318    println!("Snapshot: {}", manifest.snapshot_id);
1319    println!("  Location: {}", snapshot);
1320    println!(
1321        "  Created:  {} UTC",
1322        manifest.created_at.format("%Y-%m-%d %H:%M:%S")
1323    );
1324    println!("  Catalog:  {}", manifest.catalog);
1325    println!("  Schemas:  {}", manifest.schemas.join(", "));
1326    println!("  Chunks:   {}", format_delete_chunks(manifest));
1327}
1328
1329fn format_delete_chunks(manifest: &Manifest) -> String {
1330    if manifest.schema_only {
1331        return "0 (schema-only)".to_string();
1332    }
1333
1334    let summary = summarize_chunks(manifest);
1335    if manifest.is_complete() {
1336        format!("{} (all processed)", summary.total)
1337    } else {
1338        format!(
1339            "{} ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1340            summary.total,
1341            summary.completed,
1342            summary.skipped,
1343            summary.pending,
1344            summary.in_progress,
1345            summary.failed
1346        )
1347    }
1348}
1349
1350fn confirm_delete(snapshot: &str) -> Result<bool> {
1351    println!();
1352    println!(
1353        "Warning: this removes the entire snapshot directory/prefix, not only files listed in manifest."
1354    );
1355    println!("This will permanently delete all data under:");
1356    println!("  {}", display_snapshot_prefix(snapshot));
1357    print!("Type 'yes' to confirm deletion: ");
1358    io::stdout().flush().map_err(|error| {
1359        IoSnafu {
1360            operation: "flushing delete confirmation prompt",
1361            error,
1362        }
1363        .build()
1364    })?;
1365
1366    let mut input = String::new();
1367    io::stdin().read_line(&mut input).map_err(|error| {
1368        IoSnafu {
1369            operation: "reading delete confirmation",
1370            error,
1371        }
1372        .build()
1373    })?;
1374
1375    Ok(delete_confirmation_matches(&input))
1376}
1377
1378fn delete_confirmation_matches(input: &str) -> bool {
1379    input.trim() == "yes"
1380}
1381
1382fn display_snapshot_prefix(snapshot: &str) -> String {
1383    if snapshot.ends_with('/') {
1384        snapshot.to_string()
1385    } else {
1386        format!("{}/", snapshot)
1387    }
1388}
1389
1390#[cfg(test)]
1391mod tests {
1392    use chrono::TimeZone;
1393    use clap::Parser;
1394    use tempfile::tempdir;
1395    use url::Url;
1396
1397    use super::*;
1398    use crate::data::path::ddl_path_for_schema;
1399
1400    #[test]
1401    fn test_ddl_path_for_schema() {
1402        assert_eq!(ddl_path_for_schema("public"), "schema/ddl/public.sql");
1403        assert_eq!(
1404            ddl_path_for_schema("../evil"),
1405            "schema/ddl/%2E%2E%2Fevil.sql"
1406        );
1407    }
1408
1409    #[test]
1410    fn test_build_schema_ddl_order() {
1411        let ddl = build_schema_ddl(
1412            "public",
1413            "CREATE DATABASE public;\n".to_string(),
1414            vec!["PHYSICAL;\n".to_string()],
1415            vec!["TABLE;\n".to_string()],
1416            vec!["VIEW;\n".to_string()],
1417        );
1418
1419        let db_pos = ddl.find("CREATE DATABASE").unwrap();
1420        let physical_pos = ddl.find("PHYSICAL;").unwrap();
1421        let table_pos = ddl.find("TABLE;").unwrap();
1422        let view_pos = ddl.find("VIEW;").unwrap();
1423        assert!(db_pos < physical_pos);
1424        assert!(physical_pos < table_pos);
1425        assert!(table_pos < view_pos);
1426    }
1427
1428    #[tokio::test]
1429    async fn test_build_rejects_chunk_window_without_bounds() {
1430        let cmd = ExportCreateCommand::parse_from([
1431            "export-v2-create",
1432            "--addr",
1433            "127.0.0.1:4000",
1434            "--to",
1435            "file:///tmp/export-v2-test",
1436            "--chunk-time-window",
1437            "1h",
1438        ]);
1439
1440        let result = cmd.build().await;
1441        assert!(result.is_err());
1442        let error = result.err().unwrap().to_string();
1443
1444        assert!(error.contains("chunk_time_window requires both --start-time and --end-time"));
1445    }
1446
1447    #[tokio::test]
1448    async fn test_build_rejects_data_export_args_in_schema_only_mode() {
1449        let cmd = ExportCreateCommand::parse_from([
1450            "export-v2-create",
1451            "--addr",
1452            "127.0.0.1:4000",
1453            "--to",
1454            "file:///tmp/export-v2-test",
1455            "--schema-only",
1456            "--start-time",
1457            "2024-01-01T00:00:00Z",
1458            "--end-time",
1459            "2024-01-02T00:00:00Z",
1460            "--chunk-time-window",
1461            "1h",
1462            "--format",
1463            "csv",
1464            "--parallelism",
1465            "2",
1466        ]);
1467
1468        let error = cmd.build().await.err().unwrap().to_string();
1469
1470        assert!(error.contains("--schema-only cannot be used with data export arguments"));
1471        assert!(error.contains("--start-time"));
1472        assert!(error.contains("--end-time"));
1473        assert!(error.contains("--chunk-time-window"));
1474        assert!(error.contains("--format"));
1475        assert!(error.contains("--parallelism"));
1476    }
1477
1478    #[test]
1479    fn test_schema_only_mode_mismatch_error_message() {
1480        let error = crate::data::export_v2::error::SchemaOnlyModeMismatchSnafu {
1481            existing_schema_only: false,
1482            requested_schema_only: true,
1483        }
1484        .build()
1485        .to_string();
1486
1487        assert!(error.contains("existing: false"));
1488        assert!(error.contains("requested: true"));
1489    }
1490
1491    #[test]
1492    fn test_validate_resume_config_rejects_catalog_mismatch() {
1493        let manifest = Manifest::new_for_export(
1494            "greptime".to_string(),
1495            vec!["public".to_string()],
1496            false,
1497            TimeRange::unbounded(),
1498            DataFormat::Parquet,
1499            None,
1500        )
1501        .unwrap();
1502        let config = ExportConfig {
1503            catalog: "other".to_string(),
1504            schemas: None,
1505            schema_only: false,
1506            format: DataFormat::Parquet,
1507            force: false,
1508            time_range: TimeRange::unbounded(),
1509            chunk_time_window: None,
1510            parallelism: 1,
1511            snapshot_uri: "file:///tmp/snapshot".to_string(),
1512            storage_config: ObjectStoreConfig::default(),
1513        };
1514
1515        let error = validate_resume_config(&manifest, &config)
1516            .err()
1517            .unwrap()
1518            .to_string();
1519        assert!(error.contains("catalog"));
1520    }
1521
1522    #[test]
1523    fn test_validate_resume_config_accepts_schema_selection_with_different_case_and_order() {
1524        let manifest = Manifest::new_for_export(
1525            "greptime".to_string(),
1526            vec!["public".to_string(), "analytics".to_string()],
1527            false,
1528            TimeRange::unbounded(),
1529            DataFormat::Parquet,
1530            None,
1531        )
1532        .unwrap();
1533        let config = ExportConfig {
1534            catalog: "greptime".to_string(),
1535            schemas: Some(vec![
1536                "ANALYTICS".to_string(),
1537                "PUBLIC".to_string(),
1538                "public".to_string(),
1539            ]),
1540            schema_only: false,
1541            format: DataFormat::Parquet,
1542            force: false,
1543            time_range: TimeRange::unbounded(),
1544            chunk_time_window: None,
1545            parallelism: 1,
1546            snapshot_uri: "file:///tmp/snapshot".to_string(),
1547            storage_config: ObjectStoreConfig::default(),
1548        };
1549
1550        assert!(validate_resume_config(&manifest, &config).is_ok());
1551    }
1552
1553    #[test]
1554    fn test_validate_resume_config_rejects_chunk_plan_mismatch() {
1555        let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1556        let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 2, 0, 0).unwrap();
1557        let time_range = TimeRange::new(Some(start), Some(end));
1558        let manifest = Manifest::new_for_export(
1559            "greptime".to_string(),
1560            vec!["public".to_string()],
1561            false,
1562            time_range.clone(),
1563            DataFormat::Parquet,
1564            None,
1565        )
1566        .unwrap();
1567        let config = ExportConfig {
1568            catalog: "greptime".to_string(),
1569            schemas: None,
1570            schema_only: false,
1571            format: DataFormat::Parquet,
1572            force: false,
1573            time_range,
1574            chunk_time_window: Some(Duration::from_secs(3600)),
1575            parallelism: 1,
1576            snapshot_uri: "file:///tmp/snapshot".to_string(),
1577            storage_config: ObjectStoreConfig::default(),
1578        };
1579
1580        let error = validate_resume_config(&manifest, &config)
1581            .err()
1582            .unwrap()
1583            .to_string();
1584        assert!(error.contains("chunk plan"));
1585    }
1586
1587    #[test]
1588    fn test_validate_resume_config_rejects_format_mismatch() {
1589        let manifest = Manifest::new_for_export(
1590            "greptime".to_string(),
1591            vec!["public".to_string()],
1592            false,
1593            TimeRange::unbounded(),
1594            DataFormat::Parquet,
1595            None,
1596        )
1597        .unwrap();
1598        let config = ExportConfig {
1599            catalog: "greptime".to_string(),
1600            schemas: None,
1601            schema_only: false,
1602            format: DataFormat::Csv,
1603            force: false,
1604            time_range: TimeRange::unbounded(),
1605            chunk_time_window: None,
1606            parallelism: 1,
1607            snapshot_uri: "file:///tmp/snapshot".to_string(),
1608            storage_config: ObjectStoreConfig::default(),
1609        };
1610
1611        let error = validate_resume_config(&manifest, &config)
1612            .err()
1613            .unwrap()
1614            .to_string();
1615        assert!(error.contains("format"));
1616    }
1617
1618    #[test]
1619    fn test_validate_resume_config_rejects_time_range_mismatch() {
1620        let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1621        let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 1, 0, 0).unwrap();
1622        let manifest = Manifest::new_for_export(
1623            "greptime".to_string(),
1624            vec!["public".to_string()],
1625            false,
1626            TimeRange::new(Some(start), Some(end)),
1627            DataFormat::Parquet,
1628            None,
1629        )
1630        .unwrap();
1631        let config = ExportConfig {
1632            catalog: "greptime".to_string(),
1633            schemas: None,
1634            schema_only: false,
1635            format: DataFormat::Parquet,
1636            force: false,
1637            time_range: TimeRange::new(Some(start), Some(start)),
1638            chunk_time_window: None,
1639            parallelism: 1,
1640            snapshot_uri: "file:///tmp/snapshot".to_string(),
1641            storage_config: ObjectStoreConfig::default(),
1642        };
1643
1644        let error = validate_resume_config(&manifest, &config)
1645            .err()
1646            .unwrap()
1647            .to_string();
1648        assert!(error.contains("time_range"));
1649    }
1650
1651    #[tokio::test]
1652    async fn test_scan_snapshots_sorts_and_tracks_unreadable_manifests() {
1653        let dir = tempdir().unwrap();
1654        write_test_manifest(
1655            dir.path(),
1656            "older",
1657            test_manifest(
1658                chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1659                false,
1660                true,
1661            ),
1662        );
1663        write_test_manifest(
1664            dir.path(),
1665            "newer",
1666            test_manifest(
1667                chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap(),
1668                false,
1669                true,
1670            ),
1671        );
1672
1673        std::fs::create_dir_all(dir.path().join("empty-dir")).unwrap();
1674        std::fs::create_dir_all(dir.path().join("not-snapshot")).unwrap();
1675        std::fs::write(dir.path().join("not-snapshot").join("data.txt"), "x").unwrap();
1676        std::fs::create_dir_all(dir.path().join("broken")).unwrap();
1677        std::fs::write(dir.path().join("broken").join(MANIFEST_FILE), "{not-json").unwrap();
1678
1679        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1680        let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
1681        let result = scan_snapshots(&storage).await.unwrap();
1682
1683        assert_eq!(result.snapshots.len(), 2);
1684        assert_eq!(
1685            result.snapshots[0].manifest.created_at,
1686            chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap()
1687        );
1688        assert_eq!(
1689            result.snapshots[1].manifest.created_at,
1690            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap()
1691        );
1692        assert_eq!(result.unreadable, vec!["broken/".to_string()]);
1693        assert_eq!(result.snapshots[0].path, "newer/");
1694        assert_eq!(result.snapshots[1].path, "older/");
1695    }
1696
1697    #[test]
1698    fn test_snapshot_list_status_and_chunk_summary() {
1699        let schema_only = test_manifest(
1700            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1701            true,
1702            true,
1703        );
1704        assert_eq!(snapshot_status(&schema_only), "schema-only");
1705        assert_eq!(format_list_chunks(&schema_only), "0");
1706
1707        let complete = test_manifest(
1708            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1709            false,
1710            true,
1711        );
1712        assert_eq!(snapshot_status(&complete), "complete");
1713        assert_eq!(format_list_chunks(&complete), "2/2");
1714        assert_eq!(format_delete_chunks(&complete), "2 (all processed)");
1715
1716        let incomplete = test_manifest(
1717            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1718            false,
1719            false,
1720        );
1721        assert_eq!(snapshot_status(&incomplete), "incomplete");
1722        assert_eq!(format_list_chunks(&incomplete), "1/2");
1723        assert_eq!(
1724            format_delete_chunks(&incomplete),
1725            "2 (1 completed, 0 skipped, 1 pending, 0 in_progress, 0 failed)"
1726        );
1727    }
1728
1729    #[tokio::test]
1730    async fn test_delete_build_rejects_bucket_root_uri() {
1731        let cmd = ExportDeleteCommand::parse_from([
1732            "export-v2-delete",
1733            "--snapshot",
1734            "s3://bucket",
1735            "--no-confirm",
1736        ]);
1737
1738        let error = cmd.build().await.err().unwrap().to_string();
1739        assert!(error.contains("non-empty path"));
1740    }
1741
1742    #[test]
1743    fn test_delete_skip_confirmation_aliases() {
1744        let no_confirm = ExportDeleteCommand::parse_from([
1745            "export-v2-delete",
1746            "--snapshot",
1747            "s3://bucket/snapshot",
1748            "--no-confirm",
1749        ]);
1750        assert!(no_confirm.skip_confirmation);
1751
1752        let yes = ExportDeleteCommand::parse_from([
1753            "export-v2-delete",
1754            "--snapshot",
1755            "s3://bucket/snapshot",
1756            "--yes",
1757        ]);
1758        assert!(yes.skip_confirmation);
1759    }
1760
1761    #[tokio::test]
1762    async fn test_delete_snapshot_with_no_confirm_removes_snapshot_contents() {
1763        let parent = tempdir().unwrap();
1764        let snapshot = parent.path().join("snapshot");
1765        let sibling = parent.path().join("sibling");
1766        std::fs::create_dir_all(&snapshot).unwrap();
1767        std::fs::create_dir_all(&sibling).unwrap();
1768        std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
1769        write_root_manifest(
1770            &snapshot,
1771            test_manifest(
1772                chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1773                true,
1774                true,
1775            ),
1776        );
1777        write_snapshot_file(&snapshot, "schema/schemas.json", b"[]");
1778
1779        let uri = Url::from_directory_path(&snapshot).unwrap().to_string();
1780        let delete = ExportDelete {
1781            snapshot: uri,
1782            skip_confirmation: true,
1783            storage: file_storage_for_dir(&snapshot),
1784        };
1785
1786        delete
1787            .run_with_confirmation(|_| unreachable!())
1788            .await
1789            .unwrap();
1790
1791        assert!(!snapshot.join(MANIFEST_FILE).exists());
1792        assert!(!snapshot.join("schema/schemas.json").exists());
1793        assert!(sibling.join("keep.txt").exists());
1794    }
1795
1796    #[tokio::test]
1797    async fn test_delete_snapshot_requires_manifest() {
1798        let dir = tempdir().unwrap();
1799        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1800        let delete = ExportDelete {
1801            snapshot: uri,
1802            skip_confirmation: true,
1803            storage: file_storage_for_dir(dir.path()),
1804        };
1805
1806        let error = delete
1807            .run_with_confirmation(|_| unreachable!())
1808            .await
1809            .err()
1810            .unwrap()
1811            .to_string();
1812
1813        assert!(error.contains("Snapshot not found"));
1814        assert!(dir.path().exists());
1815    }
1816
1817    #[tokio::test]
1818    async fn test_delete_snapshot_cancels_without_exact_confirmation() {
1819        let dir = tempdir().unwrap();
1820        write_root_manifest(
1821            dir.path(),
1822            test_manifest(
1823                chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1824                true,
1825                true,
1826            ),
1827        );
1828        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1829        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1830        let delete = ExportDelete {
1831            snapshot: uri.clone(),
1832            skip_confirmation: false,
1833            storage: file_storage_for_dir(dir.path()),
1834        };
1835
1836        delete
1837            .run_with_confirmation(|snapshot| {
1838                assert_eq!(snapshot, uri);
1839                Ok(false)
1840            })
1841            .await
1842            .unwrap();
1843
1844        assert!(dir.path().join(MANIFEST_FILE).exists());
1845        assert!(dir.path().join("schema/schemas.json").exists());
1846    }
1847
1848    #[test]
1849    fn test_delete_confirmation_requires_exact_yes() {
1850        assert!(delete_confirmation_matches("yes"));
1851        assert!(delete_confirmation_matches(" yes\n"));
1852        assert!(!delete_confirmation_matches("YES"));
1853        assert!(!delete_confirmation_matches("y"));
1854        assert!(!delete_confirmation_matches("yes please"));
1855    }
1856
1857    #[test]
1858    fn test_display_snapshot_prefix_adds_trailing_slash() {
1859        assert_eq!(
1860            display_snapshot_prefix("s3://bucket/snapshot"),
1861            "s3://bucket/snapshot/"
1862        );
1863        assert_eq!(
1864            display_snapshot_prefix("s3://bucket/snapshot/"),
1865            "s3://bucket/snapshot/"
1866        );
1867    }
1868
1869    #[tokio::test]
1870    async fn test_verify_snapshot_accepts_valid_full_snapshot() {
1871        let dir = tempdir().unwrap();
1872        let manifest = test_manifest(
1873            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1874            false,
1875            true,
1876        );
1877        write_root_manifest(dir.path(), manifest);
1878        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1879        write_default_ddl_files(dir.path());
1880        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
1881
1882        let storage = file_storage_for_dir(dir.path());
1883        let report = verify_snapshot(&storage).await.unwrap();
1884
1885        assert_eq!(report.error_count(), 0);
1886        assert_eq!(report.warning_count(), 0);
1887        assert_eq!(report.data_files_total, 1);
1888        assert_eq!(report.data_files_verified, 1);
1889    }
1890
1891    #[tokio::test]
1892    async fn test_verify_snapshot_reports_missing_data_file_and_failed_chunk() {
1893        let dir = tempdir().unwrap();
1894        let mut manifest = test_manifest(
1895            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1896            false,
1897            true,
1898        );
1899        manifest.chunks[1].mark_failed("copy failed".to_string());
1900        write_root_manifest(dir.path(), manifest);
1901        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1902        write_default_ddl_files(dir.path());
1903
1904        let storage = file_storage_for_dir(dir.path());
1905        let report = verify_snapshot(&storage).await.unwrap();
1906
1907        assert_eq!(report.error_count(), 2);
1908        assert!(
1909            report
1910                .problems
1911                .iter()
1912                .any(|problem| problem.message.contains("missing file"))
1913        );
1914        assert!(
1915            report
1916                .problems
1917                .iter()
1918                .any(|problem| problem.message.contains("status is 'failed'"))
1919        );
1920    }
1921
1922    #[tokio::test]
1923    async fn test_verify_snapshot_reports_missing_schema_index_as_warning() {
1924        let dir = tempdir().unwrap();
1925        let manifest = test_manifest(
1926            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1927            false,
1928            true,
1929        );
1930        write_root_manifest(dir.path(), manifest);
1931        write_default_ddl_files(dir.path());
1932        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
1933
1934        let storage = file_storage_for_dir(dir.path());
1935        let report = verify_snapshot(&storage).await.unwrap();
1936
1937        assert_eq!(report.error_count(), 0);
1938        assert_eq!(report.warning_count(), 1);
1939        assert!(
1940            report
1941                .problems
1942                .iter()
1943                .any(|problem| problem.message.contains("Missing schema index"))
1944        );
1945    }
1946
1947    #[tokio::test]
1948    async fn test_verify_snapshot_rejects_schema_only_snapshot_with_chunks() {
1949        let dir = tempdir().unwrap();
1950        let mut manifest = test_manifest(
1951            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1952            true,
1953            true,
1954        );
1955        let mut chunk = ChunkMeta::new(1, TimeRange::unbounded());
1956        chunk.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
1957        manifest.chunks.push(chunk);
1958        write_root_manifest(dir.path(), manifest);
1959        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1960        write_default_ddl_files(dir.path());
1961
1962        let storage = file_storage_for_dir(dir.path());
1963        let report = verify_snapshot(&storage).await.unwrap();
1964
1965        assert_eq!(report.error_count(), 1);
1966        assert_eq!(report.data_files_total, 0);
1967        assert!(
1968            report
1969                .problems
1970                .iter()
1971                .any(|problem| problem.message.contains("should not contain data chunks"))
1972        );
1973    }
1974
1975    #[tokio::test]
1976    async fn test_verify_snapshot_rejects_schema_only_snapshot_with_data_files() {
1977        let dir = tempdir().unwrap();
1978        let manifest = test_manifest(
1979            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1980            true,
1981            true,
1982        );
1983        write_root_manifest(dir.path(), manifest);
1984        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
1985        write_default_ddl_files(dir.path());
1986        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
1987
1988        let storage = file_storage_for_dir(dir.path());
1989        let report = verify_snapshot(&storage).await.unwrap();
1990
1991        assert_eq!(report.error_count(), 1);
1992        assert_eq!(report.data_files_total, 0);
1993        assert!(
1994            report
1995                .problems
1996                .iter()
1997                .any(|problem| problem.message.contains("should not contain data files"))
1998        );
1999    }
2000
2001    #[tokio::test]
2002    async fn test_verify_snapshot_rejects_full_snapshot_without_chunks() {
2003        let dir = tempdir().unwrap();
2004        let mut manifest = test_manifest(
2005            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2006            false,
2007            true,
2008        );
2009        manifest.chunks.clear();
2010        write_root_manifest(dir.path(), manifest);
2011        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2012        write_default_ddl_files(dir.path());
2013
2014        let storage = file_storage_for_dir(dir.path());
2015        let report = verify_snapshot(&storage).await.unwrap();
2016
2017        assert_eq!(report.error_count(), 1);
2018        assert_eq!(report.data_files_total, 0);
2019        assert!(
2020            report
2021                .problems
2022                .iter()
2023                .any(|problem| problem.message.contains("at least one data chunk"))
2024        );
2025    }
2026
2027    #[tokio::test]
2028    async fn test_verify_snapshot_rejects_skipped_chunk_data_files() {
2029        let dir = tempdir().unwrap();
2030        let manifest = test_manifest(
2031            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2032            false,
2033            true,
2034        );
2035        write_root_manifest(dir.path(), manifest);
2036        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2037        write_default_ddl_files(dir.path());
2038        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2039        write_snapshot_file(dir.path(), "data/public/2/file.parquet", b"data");
2040
2041        let storage = file_storage_for_dir(dir.path());
2042        let report = verify_snapshot(&storage).await.unwrap();
2043
2044        assert_eq!(report.error_count(), 1);
2045        assert!(
2046            report
2047                .problems
2048                .iter()
2049                .any(|problem| { problem.message.contains("Unexpected data file") })
2050        );
2051    }
2052
2053    #[tokio::test]
2054    async fn test_verify_snapshot_rejects_duplicate_chunk_ids() {
2055        let dir = tempdir().unwrap();
2056        let mut manifest = test_manifest(
2057            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2058            false,
2059            true,
2060        );
2061        let mut duplicate = ChunkMeta::new(1, TimeRange::unbounded());
2062        duplicate.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2063        manifest.chunks.push(duplicate);
2064        write_root_manifest(dir.path(), manifest);
2065        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2066        write_default_ddl_files(dir.path());
2067        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2068
2069        let storage = file_storage_for_dir(dir.path());
2070        let report = verify_snapshot(&storage).await.unwrap();
2071
2072        assert_eq!(report.error_count(), 1);
2073        assert!(
2074            report
2075                .problems
2076                .iter()
2077                .any(|problem| problem.message.contains("duplicate chunk id"))
2078        );
2079    }
2080
2081    #[tokio::test]
2082    async fn test_verify_snapshot_requires_all_schema_ddl() {
2083        let dir = tempdir().unwrap();
2084        let manifest = test_manifest(
2085            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2086            true,
2087            true,
2088        );
2089        write_root_manifest(dir.path(), manifest);
2090        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2091        write_snapshot_file(
2092            dir.path(),
2093            "schema/ddl/public.sql",
2094            b"CREATE DATABASE public;",
2095        );
2096
2097        let storage = file_storage_for_dir(dir.path());
2098        let report = verify_snapshot(&storage).await.unwrap();
2099
2100        assert_eq!(report.error_count(), 1);
2101        assert!(
2102            report
2103                .problems
2104                .iter()
2105                .any(|problem| problem.message.contains("analytics"))
2106        );
2107    }
2108
2109    #[tokio::test]
2110    async fn test_verify_snapshot_reports_missing_ddl_dir() {
2111        let dir = tempdir().unwrap();
2112        let manifest = test_manifest(
2113            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2114            false,
2115            true,
2116        );
2117        write_root_manifest(dir.path(), manifest);
2118        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2119        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2120
2121        let storage = file_storage_for_dir(dir.path());
2122        let report = verify_snapshot(&storage).await.unwrap();
2123
2124        assert_eq!(report.error_count(), 2);
2125        assert!(
2126            report
2127                .problems
2128                .iter()
2129                .any(|problem| problem.message.contains("schema/ddl/public.sql"))
2130        );
2131        assert!(
2132            report
2133                .problems
2134                .iter()
2135                .any(|problem| problem.message.contains("schema/ddl/analytics.sql"))
2136        );
2137    }
2138
2139    #[tokio::test]
2140    async fn test_verify_snapshot_reports_manifest_version_mismatch() {
2141        let dir = tempdir().unwrap();
2142        let mut manifest = test_manifest(
2143            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2144            false,
2145            true,
2146        );
2147        manifest.version = MANIFEST_VERSION + 1;
2148        write_root_manifest(dir.path(), manifest);
2149        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2150        write_default_ddl_files(dir.path());
2151        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2152
2153        let storage = file_storage_for_dir(dir.path());
2154        let report = verify_snapshot(&storage).await.unwrap();
2155
2156        assert_eq!(report.error_count(), 1);
2157        assert!(
2158            report
2159                .problems
2160                .iter()
2161                .any(|problem| problem.message.contains("Manifest version mismatch"))
2162        );
2163    }
2164
2165    #[tokio::test]
2166    async fn test_verify_snapshot_rejects_invalid_data_file_paths() {
2167        let dir = tempdir().unwrap();
2168        let mut manifest = test_manifest(
2169            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2170            false,
2171            true,
2172        );
2173        manifest.chunks[0].files = vec!["data/public/1/../file.parquet".to_string()];
2174        write_root_manifest(dir.path(), manifest);
2175        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2176        write_default_ddl_files(dir.path());
2177
2178        let storage = file_storage_for_dir(dir.path());
2179        let report = verify_snapshot(&storage).await.unwrap();
2180
2181        assert_eq!(report.error_count(), 1);
2182        assert!(
2183            report
2184                .problems
2185                .iter()
2186                .any(|problem| problem.message.contains("invalid data file path"))
2187        );
2188        assert_eq!(report.data_files_verified, 0);
2189    }
2190
2191    #[tokio::test]
2192    async fn test_verify_snapshot_accepts_leading_slash_manifest_data_paths() {
2193        let dir = tempdir().unwrap();
2194        let mut manifest = test_manifest(
2195            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2196            false,
2197            true,
2198        );
2199        manifest.chunks[0].files = vec!["/data/public/1/file.parquet".to_string()];
2200        write_root_manifest(dir.path(), manifest);
2201        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2202        write_default_ddl_files(dir.path());
2203        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2204
2205        let storage = file_storage_for_dir(dir.path());
2206        let report = verify_snapshot(&storage).await.unwrap();
2207
2208        assert_eq!(report.error_count(), 0);
2209        assert_eq!(report.data_files_verified, 1);
2210    }
2211
2212    #[tokio::test]
2213    async fn test_verify_snapshot_rejects_unlisted_files_under_completed_chunk_prefix() {
2214        let dir = tempdir().unwrap();
2215        let manifest = test_manifest(
2216            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2217            false,
2218            true,
2219        );
2220        write_root_manifest(dir.path(), manifest);
2221        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2222        write_default_ddl_files(dir.path());
2223        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2224        write_snapshot_file(dir.path(), "data/public/1/extra.parquet", b"data");
2225
2226        let storage = file_storage_for_dir(dir.path());
2227        let report = verify_snapshot(&storage).await.unwrap();
2228
2229        assert_eq!(report.error_count(), 1);
2230        assert!(
2231            report
2232                .problems
2233                .iter()
2234                .any(|problem| problem.message.contains("Unexpected data file"))
2235        );
2236        assert_eq!(report.data_files_verified, 1);
2237    }
2238
2239    #[tokio::test]
2240    async fn test_verify_snapshot_rejects_orphan_data_files_outside_known_chunk_prefixes() {
2241        let dir = tempdir().unwrap();
2242        let manifest = test_manifest(
2243            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2244            false,
2245            true,
2246        );
2247        write_root_manifest(dir.path(), manifest);
2248        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2249        write_default_ddl_files(dir.path());
2250        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2251        write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2252
2253        let storage = file_storage_for_dir(dir.path());
2254        let report = verify_snapshot(&storage).await.unwrap();
2255
2256        assert_eq!(report.error_count(), 1);
2257        assert!(
2258            report
2259                .problems
2260                .iter()
2261                .any(|problem| problem.message.contains("Unexpected data file"))
2262        );
2263        assert_eq!(report.data_files_verified, 1);
2264    }
2265
2266    #[tokio::test]
2267    async fn test_verify_snapshot_rejects_data_files_under_wrong_chunk_or_schema() {
2268        let dir = tempdir().unwrap();
2269        let mut manifest = test_manifest(
2270            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2271            false,
2272            true,
2273        );
2274        manifest.chunks[0].files = vec![
2275            "data/public/99/file.parquet".to_string(),
2276            "data/metrics/1/file.parquet".to_string(),
2277        ];
2278        write_root_manifest(dir.path(), manifest);
2279        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2280        write_default_ddl_files(dir.path());
2281        write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2282        write_snapshot_file(dir.path(), "data/metrics/1/file.parquet", b"data");
2283
2284        let storage = file_storage_for_dir(dir.path());
2285        let report = verify_snapshot(&storage).await.unwrap();
2286
2287        assert_eq!(report.error_count(), 2);
2288        assert_eq!(report.data_files_verified, 0);
2289        assert!(
2290            report
2291                .problems
2292                .iter()
2293                .all(|problem| problem.message.contains("invalid data file path"))
2294        );
2295    }
2296
2297    fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) {
2298        let snapshot_dir = root.join(dir);
2299        std::fs::create_dir_all(&snapshot_dir).unwrap();
2300        std::fs::write(
2301            snapshot_dir.join(MANIFEST_FILE),
2302            serde_json::to_vec_pretty(&manifest).unwrap(),
2303        )
2304        .unwrap();
2305    }
2306
2307    fn write_root_manifest(root: &std::path::Path, manifest: Manifest) {
2308        std::fs::write(
2309            root.join(MANIFEST_FILE),
2310            serde_json::to_vec_pretty(&manifest).unwrap(),
2311        )
2312        .unwrap();
2313    }
2314
2315    fn write_snapshot_file(root: &std::path::Path, relative_path: &str, content: &[u8]) {
2316        let mut path = root.to_path_buf();
2317        for segment in relative_path.split('/') {
2318            path.push(segment);
2319        }
2320        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2321        std::fs::write(path, content).unwrap();
2322    }
2323
2324    fn write_default_ddl_files(root: &std::path::Path) {
2325        write_snapshot_file(root, "schema/ddl/public.sql", b"CREATE DATABASE public;");
2326        write_snapshot_file(
2327            root,
2328            "schema/ddl/analytics.sql",
2329            b"CREATE DATABASE analytics;",
2330        );
2331    }
2332
2333    fn file_storage_for_dir(root: &std::path::Path) -> OpenDalStorage {
2334        let uri = Url::from_directory_path(root).unwrap().to_string();
2335        OpenDalStorage::from_file_uri(&uri).unwrap()
2336    }
2337
2338    fn test_manifest(
2339        created_at: chrono::DateTime<chrono::Utc>,
2340        schema_only: bool,
2341        complete: bool,
2342    ) -> Manifest {
2343        let mut manifest = Manifest::new_for_export(
2344            "greptime".to_string(),
2345            vec!["public".to_string(), "analytics".to_string()],
2346            schema_only,
2347            TimeRange::unbounded(),
2348            DataFormat::Parquet,
2349            None,
2350        )
2351        .unwrap();
2352        manifest.created_at = created_at;
2353        manifest.updated_at = created_at;
2354
2355        if !schema_only {
2356            manifest.chunks.clear();
2357            let mut first = ChunkMeta::new(1, TimeRange::unbounded());
2358            first.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2359            manifest.chunks.push(first);
2360
2361            if complete {
2362                manifest
2363                    .chunks
2364                    .push(ChunkMeta::skipped(2, TimeRange::unbounded()));
2365            } else {
2366                manifest
2367                    .chunks
2368                    .push(ChunkMeta::new(2, TimeRange::unbounded()));
2369            }
2370        }
2371
2372        manifest
2373    }
2374}