Skip to main content

cli/data/export_v2/
command.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Export V2 CLI commands.
16
17use std::collections::HashSet;
18use std::io::{self, Write};
19use std::time::Duration;
20
21use async_trait::async_trait;
22use clap::{Parser, Subcommand};
23use common_error::ext::BoxedError;
24use common_telemetry::info;
25use serde_json::Value;
26use snafu::{OptionExt, ResultExt};
27
28use crate::Tool;
29use crate::common::ObjectStoreConfig;
30use crate::data::export_v2::coordinator::{ExportDataOptions, export_data};
31use crate::data::export_v2::error::{
32    ChunkTimeWindowRequiresBoundsSnafu, DatabaseSnafu, EmptyResultSnafu, IoSnafu,
33    ManifestVersionMismatchSnafu, Result, ResumeConfigMismatchSnafu, SchemaOnlyArgsNotAllowedSnafu,
34    SchemaOnlyModeMismatchSnafu, SnapshotVerifyFailedSnafu, UnexpectedValueTypeSnafu,
35};
36use crate::data::export_v2::extractor::SchemaExtractor;
37use crate::data::export_v2::manifest::{
38    ChunkMeta, ChunkStatus, DataFormat, MANIFEST_FILE, MANIFEST_VERSION, Manifest, TimeRange,
39};
40use crate::data::export_v2::schema::{DDL_DIR, SCHEMA_DIR, SCHEMAS_FILE};
41use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
42use crate::data::progress::{ProgressMode, build_progress_reporter};
43use crate::data::snapshot_storage::{
44    OpenDalStorage, SnapshotStorage, validate_snapshot_uri, validate_uri,
45};
46use crate::data::sql::{escape_sql_identifier, escape_sql_literal};
47use crate::database::{DatabaseClient, parse_proxy_opts};
48
49/// Export V2 commands.
50#[derive(Debug, Subcommand)]
51pub enum ExportV2Command {
52    /// Create a new snapshot.
53    Create(ExportCreateCommand),
54    /// List snapshots under a parent location.
55    List(ExportListCommand),
56    /// Verify snapshot integrity.
57    Verify(ExportVerifyCommand),
58    /// Delete a snapshot and all data under it.
59    Delete(ExportDeleteCommand),
60}
61
62impl ExportV2Command {
63    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
64        match self {
65            ExportV2Command::Create(cmd) => cmd.build().await,
66            ExportV2Command::List(cmd) => cmd.build().await,
67            ExportV2Command::Verify(cmd) => cmd.build().await,
68            ExportV2Command::Delete(cmd) => cmd.build().await,
69        }
70    }
71}
72
73/// List snapshots under a parent location.
74#[derive(Debug, Parser)]
75pub struct ExportListCommand {
76    /// Parent storage location whose direct subdirectories are snapshots.
77    #[clap(long)]
78    location: String,
79
80    /// Object store configuration for remote storage backends.
81    #[clap(flatten)]
82    storage: ObjectStoreConfig,
83}
84
85impl ExportListCommand {
86    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
87        validate_uri(&self.location).map_err(BoxedError::new)?;
88        let storage = OpenDalStorage::from_parent_uri(&self.location, &self.storage)
89            .map_err(BoxedError::new)?;
90
91        Ok(Box::new(ExportList {
92            location: self.location.clone(),
93            storage,
94        }))
95    }
96}
97
98/// Export list tool implementation.
99pub struct ExportList {
100    location: String,
101    storage: OpenDalStorage,
102}
103
104#[async_trait]
105impl Tool for ExportList {
106    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
107        self.run().await.map_err(BoxedError::new)
108    }
109}
110
111impl ExportList {
112    async fn run(&self) -> Result<()> {
113        let result = scan_snapshots(&self.storage).await?;
114
115        println!("Scanning: {}", self.location);
116        if result.snapshots.is_empty() {
117            println!("No snapshots found.");
118        } else {
119            print_snapshot_list(&result.snapshots, result.unreadable.len());
120        }
121        print_unreadable_warnings(&result.unreadable);
122
123        Ok(())
124    }
125}
126
127/// Verify snapshot integrity.
128#[derive(Debug, Parser)]
129pub struct ExportVerifyCommand {
130    /// Snapshot storage location (e.g., s3://bucket/path, file:///tmp/backup).
131    #[clap(long)]
132    snapshot: String,
133
134    /// Object store configuration for remote storage backends.
135    #[clap(flatten)]
136    storage: ObjectStoreConfig,
137}
138
139impl ExportVerifyCommand {
140    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
141        validate_uri(&self.snapshot).map_err(BoxedError::new)?;
142        let storage =
143            OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
144
145        Ok(Box::new(ExportVerify {
146            snapshot: self.snapshot.clone(),
147            storage,
148        }))
149    }
150}
151
152/// Export verify tool implementation.
153pub struct ExportVerify {
154    snapshot: String,
155    storage: OpenDalStorage,
156}
157
158#[async_trait]
159impl Tool for ExportVerify {
160    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
161        self.run().await.map_err(BoxedError::new)
162    }
163}
164
165impl ExportVerify {
166    async fn run(&self) -> Result<()> {
167        let report = verify_snapshot(&self.storage).await?;
168        print_verify_report(&self.snapshot, &report);
169
170        if report.has_problems() {
171            return SnapshotVerifyFailedSnafu {
172                errors: report.error_count(),
173                warnings: report.warning_count(),
174            }
175            .fail();
176        }
177
178        Ok(())
179    }
180}
181
182/// Delete a snapshot and all data under it.
183#[derive(Debug, Parser)]
184pub struct ExportDeleteCommand {
185    /// Snapshot storage location (e.g., s3://bucket/path, file:///tmp/backup).
186    #[clap(long)]
187    snapshot: String,
188
189    /// Skip interactive confirmation.
190    #[clap(long = "no-confirm", alias = "yes")]
191    skip_confirmation: bool,
192
193    /// Object store configuration for remote storage backends.
194    #[clap(flatten)]
195    storage: ObjectStoreConfig,
196}
197
198impl ExportDeleteCommand {
199    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
200        validate_snapshot_uri(&self.snapshot).map_err(BoxedError::new)?;
201        let storage =
202            OpenDalStorage::from_uri(&self.snapshot, &self.storage).map_err(BoxedError::new)?;
203
204        Ok(Box::new(ExportDelete {
205            snapshot: self.snapshot.clone(),
206            skip_confirmation: self.skip_confirmation,
207            storage,
208        }))
209    }
210}
211
212/// Export delete tool implementation.
213pub struct ExportDelete {
214    snapshot: String,
215    skip_confirmation: bool,
216    storage: OpenDalStorage,
217}
218
219#[async_trait]
220impl Tool for ExportDelete {
221    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
222        self.run().await.map_err(BoxedError::new)
223    }
224}
225
226impl ExportDelete {
227    async fn run(&self) -> Result<()> {
228        self.run_with_confirmation(confirm_delete).await
229    }
230
231    async fn run_with_confirmation<F>(&self, confirm: F) -> Result<()>
232    where
233        F: FnOnce(&str) -> Result<bool>,
234    {
235        let manifest = self.storage.read_manifest().await?;
236        print_delete_summary(&self.snapshot, &manifest);
237
238        if !self.skip_confirmation && !confirm(&self.snapshot)? {
239            println!("Deletion cancelled.");
240            return Ok(());
241        }
242
243        println!("Deleting snapshot...");
244        self.storage.delete_snapshot().await?;
245        println!("Snapshot deleted successfully.");
246
247        Ok(())
248    }
249}
250
251/// Create a new snapshot.
252#[derive(Debug, Parser)]
253pub struct ExportCreateCommand {
254    /// Server address to connect (e.g., 127.0.0.1:4000).
255    #[clap(long)]
256    addr: String,
257
258    /// Target storage location (e.g., s3://bucket/path, file:///tmp/backup).
259    #[clap(long)]
260    to: String,
261
262    /// Catalog name.
263    #[clap(long, default_value = "greptime")]
264    catalog: String,
265
266    /// Schema list to export (default: all non-system schemas).
267    /// Can be specified multiple times or comma-separated.
268    #[clap(long, value_delimiter = ',')]
269    schemas: Vec<String>,
270
271    /// Export schema only, no data.
272    #[clap(long)]
273    schema_only: bool,
274
275    /// Time range start (ISO 8601 format, e.g., 2024-01-01T00:00:00Z).
276    #[clap(long)]
277    start_time: Option<String>,
278
279    /// Time range end (ISO 8601 format, e.g., 2024-12-31T23:59:59Z).
280    #[clap(long)]
281    end_time: Option<String>,
282
283    /// Chunk time window (e.g., 1h, 6h, 1d, 7d).
284    /// Requires both --start-time and --end-time when specified.
285    #[clap(long, value_parser = humantime::parse_duration)]
286    chunk_time_window: Option<Duration>,
287
288    /// Data format: parquet, csv, json.
289    #[clap(long, value_enum, default_value = "parquet")]
290    format: DataFormat,
291
292    /// Delete existing snapshot and recreate.
293    #[clap(long)]
294    force: bool,
295
296    /// Parallelism for COPY DATABASE execution (server-side, per schema per chunk).
297    #[clap(long, default_value = "1")]
298    parallelism: usize,
299
300    /// Number of export chunks to run concurrently on the client (1..=64).
301    #[clap(long, default_value = "1", value_parser = parse_chunk_parallelism)]
302    chunk_parallelism: usize,
303
304    /// Basic authentication (user:password).
305    #[clap(long)]
306    auth_basic: Option<String>,
307
308    /// Request timeout.
309    #[clap(long, value_parser = humantime::parse_duration)]
310    timeout: Option<Duration>,
311
312    /// Proxy server address.
313    ///
314    /// If set, it overrides the system proxy unless `--no-proxy` is specified.
315    /// If neither `--proxy` nor `--no-proxy` is set, system proxy (env) may be used.
316    #[clap(long)]
317    proxy: Option<String>,
318
319    /// Disable all proxy usage (ignores `--proxy` and system proxy).
320    ///
321    /// When set and `--proxy` is not provided, this explicitly disables system proxy.
322    #[clap(long)]
323    no_proxy: bool,
324
325    /// Progress reporting mode.
326    #[clap(long, value_enum, default_value_t = ProgressMode::Auto)]
327    progress: ProgressMode,
328
329    /// Object store configuration for remote storage backends.
330    #[clap(flatten)]
331    storage: ObjectStoreConfig,
332}
333
334impl ExportCreateCommand {
335    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
336        // Validate URI format
337        validate_uri(&self.to).map_err(BoxedError::new)?;
338
339        let time_range = TimeRange::parse(self.start_time.as_deref(), self.end_time.as_deref())
340            .map_err(BoxedError::new)?;
341        if self.chunk_time_window.is_some() && !time_range.is_bounded() {
342            return ChunkTimeWindowRequiresBoundsSnafu
343                .fail()
344                .map_err(BoxedError::new);
345        }
346        if self.schema_only {
347            let mut invalid_args = Vec::new();
348            if self.start_time.is_some() {
349                invalid_args.push("--start-time");
350            }
351            if self.end_time.is_some() {
352                invalid_args.push("--end-time");
353            }
354            if self.chunk_time_window.is_some() {
355                invalid_args.push("--chunk-time-window");
356            }
357            if self.format != DataFormat::Parquet {
358                invalid_args.push("--format");
359            }
360            if self.parallelism != 1 {
361                invalid_args.push("--parallelism");
362            }
363            if self.chunk_parallelism != 1 {
364                invalid_args.push("--chunk-parallelism");
365            }
366            if !invalid_args.is_empty() {
367                return SchemaOnlyArgsNotAllowedSnafu {
368                    args: invalid_args.join(", "),
369                }
370                .fail()
371                .map_err(BoxedError::new);
372            }
373        }
374
375        // Parse schemas (empty vec means all schemas)
376        let schemas = if self.schemas.is_empty() {
377            None
378        } else {
379            Some(self.schemas.clone())
380        };
381
382        // Build storage
383        let storage = OpenDalStorage::from_uri(&self.to, &self.storage).map_err(BoxedError::new)?;
384
385        // Build database client
386        let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
387        let database_client = DatabaseClient::new(
388            self.addr.clone(),
389            self.catalog.clone(),
390            self.auth_basic.clone(),
391            self.timeout.unwrap_or(Duration::from_secs(60)),
392            proxy,
393            self.no_proxy,
394        );
395
396        Ok(Box::new(ExportCreate {
397            config: ExportConfig {
398                catalog: self.catalog.clone(),
399                schemas,
400                schema_only: self.schema_only,
401                format: self.format,
402                force: self.force,
403                time_range,
404                chunk_time_window: self.chunk_time_window,
405                parallelism: self.parallelism,
406                chunk_parallelism: self.chunk_parallelism,
407                progress: self.progress,
408                snapshot_uri: self.to.clone(),
409                storage_config: self.storage.clone(),
410            },
411            storage: Box::new(storage),
412            database_client,
413        }))
414    }
415}
416
417/// Export tool implementation.
418pub struct ExportCreate {
419    config: ExportConfig,
420    storage: Box<dyn SnapshotStorage>,
421    database_client: DatabaseClient,
422}
423
424struct ExportConfig {
425    catalog: String,
426    schemas: Option<Vec<String>>,
427    schema_only: bool,
428    format: DataFormat,
429    force: bool,
430    time_range: TimeRange,
431    chunk_time_window: Option<Duration>,
432    parallelism: usize,
433    chunk_parallelism: usize,
434    progress: ProgressMode,
435    snapshot_uri: String,
436    storage_config: ObjectStoreConfig,
437}
438
439fn parse_chunk_parallelism(value: &str) -> std::result::Result<usize, String> {
440    let parallelism = value
441        .parse::<usize>()
442        .map_err(|_| "chunk parallelism must be an integer between 1 and 64".to_string())?;
443    if (1..=64).contains(&parallelism) {
444        Ok(parallelism)
445    } else {
446        Err("chunk parallelism must be between 1 and 64".to_string())
447    }
448}
449
450#[async_trait]
451impl Tool for ExportCreate {
452    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
453        self.run().await.map_err(BoxedError::new)
454    }
455}
456
457impl ExportCreate {
458    async fn run(&self) -> Result<()> {
459        // 1. Check if snapshot exists
460        let exists = self.storage.exists().await?;
461
462        if exists {
463            if self.config.force {
464                info!("Deleting existing snapshot (--force)");
465                self.storage.delete_snapshot().await?;
466            } else {
467                // Resume mode - read existing manifest
468                let mut manifest = self.storage.read_manifest().await?;
469
470                // Check version compatibility
471                if manifest.version != MANIFEST_VERSION {
472                    return ManifestVersionMismatchSnafu {
473                        expected: MANIFEST_VERSION,
474                        found: manifest.version,
475                    }
476                    .fail();
477                }
478
479                validate_resume_config(&manifest, &self.config)?;
480
481                info!(
482                    "Resuming existing snapshot: {} (completed: {}/{} chunks)",
483                    manifest.snapshot_id,
484                    manifest.completed_count(),
485                    manifest.chunks.len()
486                );
487
488                if manifest.is_complete() {
489                    info!("Snapshot is already complete");
490                    return Ok(());
491                }
492
493                if manifest.schema_only {
494                    return Ok(());
495                }
496
497                let progress = build_progress_reporter(self.config.progress);
498                export_data(
499                    self.storage.as_ref(),
500                    &self.database_client,
501                    &mut manifest,
502                    ExportDataOptions {
503                        snapshot_uri: &self.config.snapshot_uri,
504                        storage_config: &self.config.storage_config,
505                        parallelism: self.config.parallelism,
506                        chunk_parallelism: self.config.chunk_parallelism,
507                    },
508                    progress.as_ref(),
509                )
510                .await?;
511                return Ok(());
512            }
513        }
514
515        // 2. Get schema list
516        let extractor = SchemaExtractor::new(&self.database_client, &self.config.catalog);
517        let schema_snapshot = extractor.extract(self.config.schemas.as_deref()).await?;
518
519        let schema_names: Vec<String> = schema_snapshot
520            .schemas
521            .iter()
522            .map(|s| s.name.clone())
523            .collect();
524        info!("Exporting schemas: {:?}", schema_names);
525
526        // 3. Create manifest
527        let mut manifest = Manifest::new_for_export(
528            self.config.catalog.clone(),
529            schema_names.clone(),
530            self.config.schema_only,
531            self.config.time_range.clone(),
532            self.config.format,
533            self.config.chunk_time_window,
534        )?;
535
536        // 4. Write schema files
537        self.storage.write_schema(&schema_snapshot).await?;
538        info!("Exported {} schemas", schema_snapshot.schemas.len());
539
540        // 5. Export DDL files for import recovery.
541        let ddl_by_schema = self.build_ddl_by_schema(&schema_names).await?;
542        for (schema, ddl) in ddl_by_schema {
543            let ddl_path = ddl_path_for_schema(&schema);
544            self.storage.write_text(&ddl_path, &ddl).await?;
545            info!("Exported DDL for schema {} to {}", schema, ddl_path);
546        }
547
548        // 6. Write manifest after schema artifacts and before any data export.
549        //
550        // The manifest is the snapshot commit point: only write it after the schema
551        // index and all DDL files are durable, so a crash cannot leave a "valid"
552        // snapshot that is missing required schema artifacts. For full exports we
553        // still need the manifest before data copy starts, because chunk resume is
554        // tracked by updating this manifest in place.
555        self.storage.write_manifest(&manifest).await?;
556        info!("Snapshot created: {}", manifest.snapshot_id);
557
558        if !self.config.schema_only {
559            let progress = build_progress_reporter(self.config.progress);
560            export_data(
561                self.storage.as_ref(),
562                &self.database_client,
563                &mut manifest,
564                ExportDataOptions {
565                    snapshot_uri: &self.config.snapshot_uri,
566                    storage_config: &self.config.storage_config,
567                    parallelism: self.config.parallelism,
568                    chunk_parallelism: self.config.chunk_parallelism,
569                },
570                progress.as_ref(),
571            )
572            .await?;
573        }
574
575        Ok(())
576    }
577
578    async fn build_ddl_by_schema(&self, schema_names: &[String]) -> Result<Vec<(String, String)>> {
579        let mut schemas = schema_names.to_vec();
580        schemas.sort();
581
582        let mut ddl_by_schema = Vec::with_capacity(schemas.len());
583        for schema in schemas {
584            let create_database = self.show_create("DATABASE", &schema, None).await?;
585
586            let (mut physical_tables, mut tables, mut views) =
587                self.get_schema_objects(&schema).await?;
588            physical_tables.sort();
589            let mut physical_ddls = Vec::with_capacity(physical_tables.len());
590            for table in physical_tables {
591                physical_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
592            }
593
594            tables.sort();
595            let mut table_ddls = Vec::with_capacity(tables.len());
596            for table in tables {
597                table_ddls.push(self.show_create("TABLE", &schema, Some(&table)).await?);
598            }
599
600            views.sort();
601            let mut view_ddls = Vec::with_capacity(views.len());
602            for view in views {
603                view_ddls.push(self.show_create("VIEW", &schema, Some(&view)).await?);
604            }
605
606            let ddl = build_schema_ddl(
607                &schema,
608                create_database,
609                physical_ddls,
610                table_ddls,
611                view_ddls,
612            );
613            ddl_by_schema.push((schema, ddl));
614        }
615
616        Ok(ddl_by_schema)
617    }
618
619    async fn get_schema_objects(
620        &self,
621        schema: &str,
622    ) -> Result<(Vec<String>, Vec<String>, Vec<String>)> {
623        let physical_tables = self.get_metric_physical_tables(schema).await?;
624        let physical_set: HashSet<&str> = physical_tables.iter().map(String::as_str).collect();
625        let sql = format!(
626            "SELECT table_name, table_type FROM information_schema.tables \
627             WHERE table_catalog = '{}' AND table_schema = '{}' \
628             AND (table_type = 'BASE TABLE' OR table_type = 'VIEW')",
629            escape_sql_literal(&self.config.catalog),
630            escape_sql_literal(schema)
631        );
632        let records: Option<Vec<Vec<Value>>> = self
633            .database_client
634            .sql_in_public(&sql)
635            .await
636            .context(DatabaseSnafu)?;
637
638        let mut tables = Vec::new();
639        let mut views = Vec::new();
640        if let Some(rows) = records {
641            for row in rows {
642                let name = match row.first() {
643                    Some(Value::String(name)) => name.clone(),
644                    _ => return UnexpectedValueTypeSnafu.fail(),
645                };
646                let table_type = match row.get(1) {
647                    Some(Value::String(table_type)) => table_type.as_str(),
648                    _ => return UnexpectedValueTypeSnafu.fail(),
649                };
650                if !physical_set.contains(name.as_str()) {
651                    if table_type == "VIEW" {
652                        views.push(name);
653                    } else {
654                        tables.push(name);
655                    }
656                }
657            }
658        }
659
660        Ok((physical_tables, tables, views))
661    }
662
663    async fn get_metric_physical_tables(&self, schema: &str) -> Result<Vec<String>> {
664        let sql = format!(
665            "SELECT DISTINCT table_name FROM information_schema.columns \
666             WHERE table_catalog = '{}' AND table_schema = '{}' AND column_name = '__tsid'",
667            escape_sql_literal(&self.config.catalog),
668            escape_sql_literal(schema)
669        );
670        let records: Option<Vec<Vec<Value>>> = self
671            .database_client
672            .sql_in_public(&sql)
673            .await
674            .context(DatabaseSnafu)?;
675
676        let mut tables = HashSet::new();
677        if let Some(rows) = records {
678            for row in rows {
679                let name = match row.first() {
680                    Some(Value::String(name)) => name.clone(),
681                    _ => return UnexpectedValueTypeSnafu.fail(),
682                };
683                tables.insert(name);
684            }
685        }
686
687        Ok(tables.into_iter().collect())
688    }
689
690    async fn show_create(
691        &self,
692        show_type: &str,
693        schema: &str,
694        table: Option<&str>,
695    ) -> Result<String> {
696        let sql = match table {
697            Some(table) => format!(
698                r#"SHOW CREATE {} "{}"."{}"."{}""#,
699                show_type,
700                escape_sql_identifier(&self.config.catalog),
701                escape_sql_identifier(schema),
702                escape_sql_identifier(table)
703            ),
704            None => format!(
705                r#"SHOW CREATE {} "{}"."{}""#,
706                show_type,
707                escape_sql_identifier(&self.config.catalog),
708                escape_sql_identifier(schema)
709            ),
710        };
711
712        let records: Option<Vec<Vec<Value>>> = self
713            .database_client
714            .sql_in_public(&sql)
715            .await
716            .context(DatabaseSnafu)?;
717        let rows = records.context(EmptyResultSnafu)?;
718        let row = rows.first().context(EmptyResultSnafu)?;
719        let Some(Value::String(create)) = row.get(1) else {
720            return UnexpectedValueTypeSnafu.fail();
721        };
722
723        Ok(format!("{};\n", create))
724    }
725}
726
727fn build_schema_ddl(
728    schema: &str,
729    create_database: String,
730    physical_tables: Vec<String>,
731    tables: Vec<String>,
732    views: Vec<String>,
733) -> String {
734    let mut ddl = String::new();
735    ddl.push_str(&format!("-- Schema: {}\n", schema));
736    ddl.push_str(&create_database);
737    for stmt in physical_tables {
738        ddl.push_str(&stmt);
739    }
740    for stmt in tables {
741        ddl.push_str(&stmt);
742    }
743    for stmt in views {
744        ddl.push_str(&stmt);
745    }
746    ddl.push('\n');
747    ddl
748}
749
750fn validate_resume_config(manifest: &Manifest, config: &ExportConfig) -> Result<()> {
751    if manifest.schema_only != config.schema_only {
752        return SchemaOnlyModeMismatchSnafu {
753            existing_schema_only: manifest.schema_only,
754            requested_schema_only: config.schema_only,
755        }
756        .fail();
757    }
758
759    if manifest.catalog != config.catalog {
760        return ResumeConfigMismatchSnafu {
761            field: "catalog",
762            existing: manifest.catalog.clone(),
763            requested: config.catalog.clone(),
764        }
765        .fail();
766    }
767
768    // If no schema filter is provided on resume, inherit the existing snapshot
769    // selection instead of reinterpreting the request as "all schemas".
770    if let Some(requested_schemas) = &config.schemas
771        && !schema_selection_matches(&manifest.schemas, requested_schemas)
772    {
773        return ResumeConfigMismatchSnafu {
774            field: "schemas",
775            existing: format_schema_selection(&manifest.schemas),
776            requested: format_schema_selection(requested_schemas),
777        }
778        .fail();
779    }
780
781    if manifest.time_range != config.time_range {
782        return ResumeConfigMismatchSnafu {
783            field: "time_range",
784            existing: format!("{:?}", manifest.time_range),
785            requested: format!("{:?}", config.time_range),
786        }
787        .fail();
788    }
789
790    if manifest.format != config.format {
791        return ResumeConfigMismatchSnafu {
792            field: "format",
793            existing: manifest.format.to_string(),
794            requested: config.format.to_string(),
795        }
796        .fail();
797    }
798
799    let expected_plan = Manifest::new_for_export(
800        manifest.catalog.clone(),
801        manifest.schemas.clone(),
802        config.schema_only,
803        config.time_range.clone(),
804        config.format,
805        config.chunk_time_window,
806    )?;
807    if !chunk_plan_matches(manifest, &expected_plan) {
808        return ResumeConfigMismatchSnafu {
809            field: "chunk plan",
810            existing: format_chunk_plan(&manifest.chunks),
811            requested: format_chunk_plan(&expected_plan.chunks),
812        }
813        .fail();
814    }
815
816    Ok(())
817}
818
819fn schema_selection_matches(existing: &[String], requested: &[String]) -> bool {
820    canonical_schema_selection(existing) == canonical_schema_selection(requested)
821}
822
823fn canonical_schema_selection(schemas: &[String]) -> Vec<String> {
824    let mut canonicalized = Vec::new();
825    let mut seen = HashSet::new();
826
827    for schema in schemas {
828        let normalized = schema.to_ascii_lowercase();
829        if seen.insert(normalized.clone()) {
830            canonicalized.push(normalized);
831        }
832    }
833
834    canonicalized.sort();
835    canonicalized
836}
837
838fn format_schema_selection(schemas: &[String]) -> String {
839    format!("[{}]", schemas.join(", "))
840}
841
842fn chunk_plan_matches(existing: &Manifest, expected: &Manifest) -> bool {
843    existing.chunks.len() == expected.chunks.len()
844        && existing
845            .chunks
846            .iter()
847            .zip(&expected.chunks)
848            .all(|(left, right)| left.id == right.id && left.time_range == right.time_range)
849}
850
851fn format_chunk_plan(chunks: &[ChunkMeta]) -> String {
852    let items = chunks
853        .iter()
854        .map(|chunk| format!("#{}:{:?}", chunk.id, chunk.time_range))
855        .collect::<Vec<_>>();
856    format!("[{}]", items.join(", "))
857}
858
859#[derive(Debug)]
860struct SnapshotListEntry {
861    path: String,
862    manifest: Manifest,
863}
864
865#[derive(Debug, Default)]
866struct SnapshotScanResult {
867    snapshots: Vec<SnapshotListEntry>,
868    unreadable: Vec<String>,
869}
870
871async fn scan_snapshots(storage: &OpenDalStorage) -> Result<SnapshotScanResult> {
872    let mut result = SnapshotScanResult::default();
873    for dir in storage.list_direct_child_dirs().await? {
874        let manifest_path = format!("{}/{}", dir.trim_matches('/'), MANIFEST_FILE);
875        let Some(data) = storage.read_file_if_exists(&manifest_path).await? else {
876            continue;
877        };
878
879        match serde_json::from_slice::<Manifest>(&data) {
880            Ok(manifest) => result.snapshots.push(SnapshotListEntry {
881                path: format!("{}/", dir.trim_matches('/')),
882                manifest,
883            }),
884            Err(_) => result
885                .unreadable
886                .push(format!("{}/", dir.trim_matches('/'))),
887        }
888    }
889
890    result
891        .snapshots
892        .sort_by_key(|entry| std::cmp::Reverse(entry.manifest.created_at));
893    result.unreadable.sort();
894    Ok(result)
895}
896
897fn print_snapshot_list(snapshots: &[SnapshotListEntry], unreadable_count: usize) {
898    if unreadable_count == 0 {
899        println!("Found {} snapshots:", snapshots.len());
900    } else {
901        println!(
902            "Found {} snapshots ({} {} skipped: unreadable manifest):",
903            snapshots.len(),
904            unreadable_count,
905            directory_word(unreadable_count)
906        );
907    }
908    println!();
909    println!(
910        "  {:<24}  {:<36}  {:<19}  {:<9}  {:<7}  {:<6}  Status",
911        "Path", "ID", "Created", "Catalog", "Schemas", "Chunks"
912    );
913    println!(
914        "  {:<24}  {:<36}  {:<19}  {:<9}  {:<7}  {:<6}  {:<10}",
915        "-".repeat(24),
916        "-".repeat(36),
917        "-".repeat(19),
918        "-".repeat(9),
919        "-".repeat(7),
920        "-".repeat(6),
921        "-".repeat(10)
922    );
923    for entry in snapshots {
924        let manifest = &entry.manifest;
925        println!(
926            "  {:<24}  {:<36}  {:<19}  {:<9}  {:<7}  {:<6}  {}",
927            entry.path,
928            manifest.snapshot_id,
929            manifest.created_at.format("%Y-%m-%d %H:%M:%S"),
930            manifest.catalog,
931            manifest.schemas.len(),
932            format_list_chunks(manifest),
933            snapshot_status(manifest)
934        );
935    }
936}
937
938fn print_unreadable_warnings(unreadable: &[String]) {
939    if unreadable.is_empty() {
940        return;
941    }
942
943    println!();
944    println!(
945        "Warning: {} {} had corrupt/unreadable manifest.json:",
946        unreadable.len(),
947        directory_word(unreadable.len())
948    );
949    for path in unreadable {
950        println!("  - {}", path);
951    }
952}
953
954fn directory_word(count: usize) -> &'static str {
955    if count == 1 {
956        "directory"
957    } else {
958        "directories"
959    }
960}
961
962fn snapshot_status(manifest: &Manifest) -> &'static str {
963    if manifest.schema_only {
964        "schema-only"
965    } else if manifest.is_complete() {
966        "complete"
967    } else {
968        "incomplete"
969    }
970}
971
972fn format_list_chunks(manifest: &Manifest) -> String {
973    let total = manifest.chunks.len();
974    if total == 0 {
975        return "0".to_string();
976    }
977
978    format!(
979        "{}/{}",
980        manifest.completed_count() + manifest.skipped_count(),
981        total
982    )
983}
984
985#[derive(Debug, Clone, Copy, PartialEq, Eq)]
986enum VerifySeverity {
987    Error,
988    Warn,
989}
990
991impl VerifySeverity {
992    fn as_str(self) -> &'static str {
993        match self {
994            VerifySeverity::Error => "ERROR",
995            VerifySeverity::Warn => "WARN",
996        }
997    }
998}
999
1000#[derive(Debug)]
1001struct VerifyProblem {
1002    severity: VerifySeverity,
1003    message: String,
1004}
1005
1006#[derive(Debug, Default)]
1007struct VerifyChunkSummary {
1008    total: usize,
1009    completed: usize,
1010    skipped: usize,
1011    pending: usize,
1012    in_progress: usize,
1013    failed: usize,
1014}
1015
1016#[derive(Debug)]
1017struct VerifyReport {
1018    manifest: Manifest,
1019    schema_index_exists: bool,
1020    ddl_file_count: usize,
1021    chunk_summary: VerifyChunkSummary,
1022    data_files_total: usize,
1023    data_files_verified: usize,
1024    problems: Vec<VerifyProblem>,
1025}
1026
1027impl VerifyReport {
1028    fn error_count(&self) -> usize {
1029        self.problems
1030            .iter()
1031            .filter(|problem| problem.severity == VerifySeverity::Error)
1032            .count()
1033    }
1034
1035    fn warning_count(&self) -> usize {
1036        self.problems
1037            .iter()
1038            .filter(|problem| problem.severity == VerifySeverity::Warn)
1039            .count()
1040    }
1041
1042    fn has_problems(&self) -> bool {
1043        !self.problems.is_empty()
1044    }
1045
1046    fn push_error(&mut self, message: impl Into<String>) {
1047        self.problems.push(VerifyProblem {
1048            severity: VerifySeverity::Error,
1049            message: message.into(),
1050        });
1051    }
1052
1053    fn push_warn(&mut self, message: impl Into<String>) {
1054        self.problems.push(VerifyProblem {
1055            severity: VerifySeverity::Warn,
1056            message: message.into(),
1057        });
1058    }
1059}
1060
1061async fn verify_snapshot(storage: &OpenDalStorage) -> Result<VerifyReport> {
1062    let manifest = storage.read_manifest().await?;
1063    let schema_index_path = format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE);
1064    let ddl_prefix = format!("{}/{}/", SCHEMA_DIR, DDL_DIR);
1065    let schema_index_exists = storage.file_exists(&schema_index_path).await?;
1066    let ddl_files: HashSet<_> = storage
1067        .list_files_recursive(&ddl_prefix)
1068        .await?
1069        .into_iter()
1070        .collect();
1071    let ddl_file_count = ddl_files
1072        .iter()
1073        .filter(|path| path.ends_with(".sql"))
1074        .count();
1075
1076    let mut report = VerifyReport {
1077        manifest,
1078        schema_index_exists,
1079        ddl_file_count,
1080        chunk_summary: VerifyChunkSummary::default(),
1081        data_files_total: 0,
1082        data_files_verified: 0,
1083        problems: Vec::new(),
1084    };
1085
1086    if report.manifest.version != MANIFEST_VERSION {
1087        report.push_error(format!(
1088            "Manifest version mismatch: expected {}, found {}",
1089            MANIFEST_VERSION, report.manifest.version
1090        ));
1091    }
1092
1093    if !report.schema_index_exists {
1094        report.push_warn(format!("Missing schema index '{}'", schema_index_path));
1095    }
1096
1097    for schema in &report.manifest.schemas {
1098        let ddl_path = ddl_path_for_schema(schema);
1099        if !ddl_files.contains(ddl_path.as_str()) {
1100            report.problems.push(VerifyProblem {
1101                severity: VerifySeverity::Error,
1102                message: format!("Schema '{}': missing DDL file '{}'", schema, ddl_path),
1103            });
1104        }
1105    }
1106
1107    report.chunk_summary = summarize_chunks(&report.manifest);
1108    if report.manifest.schema_only {
1109        let chunk_count = report.manifest.chunks.len();
1110        if chunk_count > 0 {
1111            report.push_error(format!(
1112                "Schema-only snapshot should not contain data chunks (found {})",
1113                chunk_count
1114            ));
1115        }
1116        let mut first_data_file: Option<String> = None;
1117        storage
1118            .for_each_file_recursive("data/", |path| {
1119                let should_update = match &first_data_file {
1120                    Some(current) => path.as_str() < current.as_str(),
1121                    None => true,
1122                };
1123                if should_update {
1124                    first_data_file = Some(path);
1125                }
1126                Ok(())
1127            })
1128            .await?;
1129        if let Some(path) = first_data_file {
1130            report.push_error(format!(
1131                "Schema-only snapshot should not contain data files (found '{}')",
1132                path
1133            ));
1134        }
1135    } else if report.manifest.chunks.is_empty() {
1136        report.push_error("Full snapshot should contain at least one data chunk");
1137    } else {
1138        verify_chunks_and_data_files(storage, &mut report).await?;
1139    }
1140
1141    Ok(report)
1142}
1143
1144fn summarize_chunks(manifest: &Manifest) -> VerifyChunkSummary {
1145    VerifyChunkSummary {
1146        total: manifest.chunks.len(),
1147        completed: manifest.completed_count(),
1148        skipped: manifest.skipped_count(),
1149        pending: manifest.pending_count(),
1150        in_progress: manifest.in_progress_count(),
1151        failed: manifest.failed_count(),
1152    }
1153}
1154
1155/// A data file declared by a completed chunk that is expected to exist in storage.
1156#[derive(Debug)]
1157struct ChunkFile {
1158    chunk_id: u32,
1159    path: String,
1160}
1161
1162/// Expected snapshot contents derived purely from the manifest (no object-store IO).
1163///
1164/// Separating planning from scanning makes it obvious which problems come from
1165/// the manifest alone and which require comparing against actual storage.
1166#[derive(Debug, Default)]
1167struct VerifyPlan {
1168    /// Valid data files declared by completed chunks; each must exist in storage.
1169    files_to_check: Vec<ChunkFile>,
1170    /// All syntactically-safe data paths declared by any chunk, regardless of
1171    /// status. Used as the orphan-detection baseline so a listed-but-invalid
1172    /// file is not also reported as unexpected.
1173    claimed_data_files: HashSet<String>,
1174    /// Total data-file references in completed chunks (valid + invalid).
1175    data_files_total: usize,
1176    /// Problems detectable from the manifest alone.
1177    problems: Vec<VerifyProblem>,
1178}
1179
1180/// Data-file scan result. Claimed files are kept only when they are relevant to
1181/// manifest verification; unexpected files are kept separately for reporting.
1182#[derive(Debug)]
1183struct VerifyDataScan {
1184    existing_claimed_data_files: HashSet<String>,
1185    unexpected_data_files: Vec<String>,
1186}
1187
1188/// Result of reconciling the manifest plan against the storage scan.
1189#[derive(Debug, Default)]
1190struct VerifyOutcome {
1191    data_files_total: usize,
1192    data_files_verified: usize,
1193    problems: Vec<VerifyProblem>,
1194}
1195
1196async fn verify_chunks_and_data_files(
1197    storage: &OpenDalStorage,
1198    report: &mut VerifyReport,
1199) -> Result<()> {
1200    let plan = build_verify_plan(&report.manifest);
1201    let scan = scan_data_files(storage, &plan).await?;
1202    let outcome = reconcile_plan_with_scan(plan, scan);
1203
1204    report.data_files_total = outcome.data_files_total;
1205    report.data_files_verified = outcome.data_files_verified;
1206    report.problems.extend(outcome.problems);
1207
1208    Ok(())
1209}
1210
1211/// Builds the expected-state plan from the manifest. Pure; performs no IO.
1212fn build_verify_plan(manifest: &Manifest) -> VerifyPlan {
1213    let mut plan = VerifyPlan::default();
1214    let mut seen_chunk_ids = HashSet::new();
1215
1216    for chunk in &manifest.chunks {
1217        if !seen_chunk_ids.insert(chunk.id) {
1218            plan.problems.push(VerifyProblem {
1219                severity: VerifySeverity::Error,
1220                message: format!("Chunk {}: duplicate chunk id", chunk.id),
1221            });
1222        }
1223        for file in &chunk.files {
1224            if let Some(path) = safe_manifest_data_file_path(file) {
1225                plan.claimed_data_files.insert(path.to_string());
1226            }
1227        }
1228
1229        match chunk.status {
1230            ChunkStatus::Completed => {
1231                if chunk.files.is_empty() {
1232                    plan.problems.push(VerifyProblem {
1233                        severity: VerifySeverity::Error,
1234                        message: format!("Chunk {}: completed chunk has no data files", chunk.id),
1235                    });
1236                    continue;
1237                }
1238                let allowed_prefixes = manifest
1239                    .schemas
1240                    .iter()
1241                    .map(|schema| data_dir_for_schema_chunk(schema, chunk.id))
1242                    .collect::<Vec<_>>();
1243                for file in &chunk.files {
1244                    plan.data_files_total += 1;
1245                    match valid_manifest_data_file_path(file, &allowed_prefixes) {
1246                        Some(path) => plan.files_to_check.push(ChunkFile {
1247                            chunk_id: chunk.id,
1248                            path: path.to_string(),
1249                        }),
1250                        None => plan.problems.push(VerifyProblem {
1251                            severity: VerifySeverity::Error,
1252                            message: format!(
1253                                "Chunk {}: invalid data file path '{}'",
1254                                chunk.id, file
1255                            ),
1256                        }),
1257                    }
1258                }
1259            }
1260            ChunkStatus::Skipped => {
1261                if !chunk.files.is_empty() {
1262                    plan.problems.push(VerifyProblem {
1263                        severity: VerifySeverity::Error,
1264                        message: format!(
1265                            "Chunk {}: skipped chunk should not list data files",
1266                            chunk.id
1267                        ),
1268                    });
1269                }
1270            }
1271            ChunkStatus::Pending => {
1272                plan.problems.push(VerifyProblem {
1273                    severity: VerifySeverity::Error,
1274                    message: format!("Chunk {}: status is 'pending'", chunk.id),
1275                });
1276            }
1277            ChunkStatus::InProgress => {
1278                plan.problems.push(VerifyProblem {
1279                    severity: VerifySeverity::Error,
1280                    message: format!("Chunk {}: status is 'in_progress'", chunk.id),
1281                });
1282            }
1283            ChunkStatus::Failed => {
1284                let reason = chunk.error.as_deref().unwrap_or("unknown error");
1285                plan.problems.push(VerifyProblem {
1286                    severity: VerifySeverity::Error,
1287                    message: format!("Chunk {}: status is 'failed' (error: {})", chunk.id, reason),
1288                });
1289            }
1290        }
1291    }
1292
1293    plan
1294}
1295
1296/// Streams data files under `data/` and classifies each path against the plan.
1297async fn scan_data_files(storage: &OpenDalStorage, plan: &VerifyPlan) -> Result<VerifyDataScan> {
1298    let mut scan = VerifyDataScan {
1299        existing_claimed_data_files: HashSet::new(),
1300        unexpected_data_files: Vec::new(),
1301    };
1302
1303    storage
1304        .for_each_file_recursive("data/", |path| {
1305            if plan.claimed_data_files.contains(&path) {
1306                scan.existing_claimed_data_files.insert(path);
1307            } else {
1308                scan.unexpected_data_files.push(path);
1309            }
1310            Ok(())
1311        })
1312        .await?;
1313
1314    Ok(scan)
1315}
1316
1317/// Reconciles the manifest plan against the storage scan. Pure; performs no IO.
1318///
1319/// Emits missing-file problems for expected files absent from storage and
1320/// unexpected-file problems for storage files no chunk claims. Unexpected files
1321/// are sorted by path so output is deterministic regardless of listing order.
1322fn reconcile_plan_with_scan(plan: VerifyPlan, mut scan: VerifyDataScan) -> VerifyOutcome {
1323    let mut problems = plan.problems;
1324    let mut data_files_verified = 0;
1325
1326    for file in &plan.files_to_check {
1327        if scan.existing_claimed_data_files.contains(&file.path) {
1328            data_files_verified += 1;
1329        } else {
1330            problems.push(VerifyProblem {
1331                severity: VerifySeverity::Error,
1332                message: format!("Chunk {}: missing file '{}'", file.chunk_id, file.path),
1333            });
1334        }
1335    }
1336
1337    scan.unexpected_data_files.sort();
1338    for path in scan.unexpected_data_files {
1339        problems.push(VerifyProblem {
1340            severity: VerifySeverity::Error,
1341            message: format!("Unexpected data file '{}' is not listed in manifest", path),
1342        });
1343    }
1344
1345    VerifyOutcome {
1346        data_files_total: plan.data_files_total,
1347        data_files_verified,
1348        problems,
1349    }
1350}
1351
1352fn valid_manifest_data_file_path<'a>(
1353    path: &'a str,
1354    allowed_prefixes: &[String],
1355) -> Option<&'a str> {
1356    let normalized = safe_manifest_data_file_path(path)?;
1357
1358    if !allowed_prefixes
1359        .iter()
1360        .any(|prefix| normalized.starts_with(prefix))
1361    {
1362        return None;
1363    }
1364
1365    Some(normalized)
1366}
1367
1368fn safe_manifest_data_file_path(path: &str) -> Option<&str> {
1369    let normalized = path.trim_start_matches('/');
1370    if normalized.is_empty() || !normalized.starts_with("data/") {
1371        return None;
1372    }
1373
1374    if normalized
1375        .split('/')
1376        .any(|segment| segment.is_empty() || segment == "." || segment == "..")
1377    {
1378        return None;
1379    }
1380
1381    Some(normalized)
1382}
1383
1384fn print_verify_report(snapshot: &str, report: &VerifyReport) {
1385    println!("Verifying snapshot: {}", report.manifest.snapshot_id);
1386    println!("  Location:     {}", snapshot);
1387    if report.manifest.version == MANIFEST_VERSION {
1388        println!("  Manifest:     OK (version {})", report.manifest.version);
1389    } else {
1390        println!(
1391            "  Manifest:     ERROR (version {}, expected {})",
1392            report.manifest.version, MANIFEST_VERSION
1393        );
1394    }
1395    println!(
1396        "  Schema files: {}",
1397        if report.schema_index_exists {
1398            format!("OK ({})", SCHEMAS_FILE)
1399        } else {
1400            format!("WARN (missing {})", SCHEMAS_FILE)
1401        }
1402    );
1403    if report.ddl_file_count > 0 {
1404        println!("  DDL files:    {} file(s) found", report.ddl_file_count);
1405    } else {
1406        println!("  DDL files:    not present");
1407    }
1408
1409    let chunks = &report.chunk_summary;
1410    println!(
1411        "  Chunks:       {} total ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1412        chunks.total,
1413        chunks.completed,
1414        chunks.skipped,
1415        chunks.pending,
1416        chunks.in_progress,
1417        chunks.failed
1418    );
1419
1420    if report.manifest.schema_only {
1421        println!("  Data files:   skipped (schema-only)");
1422    } else {
1423        println!(
1424            "  Data files:   {}/{} files verified",
1425            report.data_files_verified, report.data_files_total
1426        );
1427    }
1428
1429    if report.problems.is_empty() {
1430        println!();
1431        println!("Snapshot is valid.");
1432        return;
1433    }
1434
1435    println!();
1436    println!("Problems found:");
1437    for problem in &report.problems {
1438        println!("  [{}] {}", problem.severity.as_str(), problem.message);
1439    }
1440    println!();
1441    println!(
1442        "Snapshot has {} error(s), {} warning(s).",
1443        report.error_count(),
1444        report.warning_count()
1445    );
1446}
1447
1448fn print_delete_summary(snapshot: &str, manifest: &Manifest) {
1449    println!("Snapshot: {}", manifest.snapshot_id);
1450    println!("  Location: {}", snapshot);
1451    println!(
1452        "  Created:  {} UTC",
1453        manifest.created_at.format("%Y-%m-%d %H:%M:%S")
1454    );
1455    println!("  Catalog:  {}", manifest.catalog);
1456    println!("  Schemas:  {}", manifest.schemas.join(", "));
1457    println!("  Chunks:   {}", format_delete_chunks(manifest));
1458}
1459
1460fn format_delete_chunks(manifest: &Manifest) -> String {
1461    if manifest.schema_only {
1462        return "0 (schema-only)".to_string();
1463    }
1464
1465    let summary = summarize_chunks(manifest);
1466    if manifest.is_complete() {
1467        format!("{} (all processed)", summary.total)
1468    } else {
1469        format!(
1470            "{} ({} completed, {} skipped, {} pending, {} in_progress, {} failed)",
1471            summary.total,
1472            summary.completed,
1473            summary.skipped,
1474            summary.pending,
1475            summary.in_progress,
1476            summary.failed
1477        )
1478    }
1479}
1480
1481fn confirm_delete(snapshot: &str) -> Result<bool> {
1482    println!();
1483    println!(
1484        "Warning: this removes the entire snapshot directory/prefix, not only files listed in manifest."
1485    );
1486    println!("This will permanently delete all data under:");
1487    println!("  {}", display_snapshot_prefix(snapshot));
1488    print!("Type 'yes' to confirm deletion: ");
1489    io::stdout().flush().map_err(|error| {
1490        IoSnafu {
1491            operation: "flushing delete confirmation prompt",
1492            error,
1493        }
1494        .build()
1495    })?;
1496
1497    let mut input = String::new();
1498    io::stdin().read_line(&mut input).map_err(|error| {
1499        IoSnafu {
1500            operation: "reading delete confirmation",
1501            error,
1502        }
1503        .build()
1504    })?;
1505
1506    Ok(delete_confirmation_matches(&input))
1507}
1508
1509fn delete_confirmation_matches(input: &str) -> bool {
1510    input.trim() == "yes"
1511}
1512
1513fn display_snapshot_prefix(snapshot: &str) -> String {
1514    if snapshot.ends_with('/') {
1515        snapshot.to_string()
1516    } else {
1517        format!("{}/", snapshot)
1518    }
1519}
1520
1521#[cfg(test)]
1522mod tests {
1523    use chrono::TimeZone;
1524    use clap::Parser;
1525    use tempfile::tempdir;
1526    use url::Url;
1527
1528    use super::*;
1529    use crate::data::path::ddl_path_for_schema;
1530
1531    #[test]
1532    fn test_ddl_path_for_schema() {
1533        assert_eq!(ddl_path_for_schema("public"), "schema/ddl/public.sql");
1534        assert_eq!(
1535            ddl_path_for_schema("../evil"),
1536            "schema/ddl/%2E%2E%2Fevil.sql"
1537        );
1538    }
1539
1540    #[test]
1541    fn test_build_schema_ddl_order() {
1542        let ddl = build_schema_ddl(
1543            "public",
1544            "CREATE DATABASE public;\n".to_string(),
1545            vec!["PHYSICAL;\n".to_string()],
1546            vec!["TABLE;\n".to_string()],
1547            vec!["VIEW;\n".to_string()],
1548        );
1549
1550        let db_pos = ddl.find("CREATE DATABASE").unwrap();
1551        let physical_pos = ddl.find("PHYSICAL;").unwrap();
1552        let table_pos = ddl.find("TABLE;").unwrap();
1553        let view_pos = ddl.find("VIEW;").unwrap();
1554        assert!(db_pos < physical_pos);
1555        assert!(physical_pos < table_pos);
1556        assert!(table_pos < view_pos);
1557    }
1558
1559    #[tokio::test]
1560    async fn test_build_rejects_chunk_window_without_bounds() {
1561        let cmd = ExportCreateCommand::parse_from([
1562            "export-v2-create",
1563            "--addr",
1564            "127.0.0.1:4000",
1565            "--to",
1566            "file:///tmp/export-v2-test",
1567            "--chunk-time-window",
1568            "1h",
1569        ]);
1570
1571        let result = cmd.build().await;
1572        assert!(result.is_err());
1573        let error = result.err().unwrap().to_string();
1574
1575        assert!(error.contains("chunk_time_window requires both --start-time and --end-time"));
1576    }
1577
1578    #[tokio::test]
1579    async fn test_build_rejects_data_export_args_in_schema_only_mode() {
1580        let cmd = ExportCreateCommand::parse_from([
1581            "export-v2-create",
1582            "--addr",
1583            "127.0.0.1:4000",
1584            "--to",
1585            "file:///tmp/export-v2-test",
1586            "--schema-only",
1587            "--start-time",
1588            "2024-01-01T00:00:00Z",
1589            "--end-time",
1590            "2024-01-02T00:00:00Z",
1591            "--chunk-time-window",
1592            "1h",
1593            "--format",
1594            "csv",
1595            "--parallelism",
1596            "2",
1597            "--chunk-parallelism",
1598            "2",
1599        ]);
1600
1601        let error = cmd.build().await.err().unwrap().to_string();
1602
1603        assert!(error.contains("--schema-only cannot be used with data export arguments"));
1604        assert!(error.contains("--start-time"));
1605        assert!(error.contains("--end-time"));
1606        assert!(error.contains("--chunk-time-window"));
1607        assert!(error.contains("--format"));
1608        assert!(error.contains("--parallelism"));
1609        assert!(error.contains("--chunk-parallelism"));
1610    }
1611
1612    #[test]
1613    fn test_chunk_parallelism_defaults_to_one() {
1614        let cmd = ExportCreateCommand::parse_from([
1615            "export-v2-create",
1616            "--addr",
1617            "127.0.0.1:4000",
1618            "--to",
1619            "file:///tmp/export-v2-test",
1620        ]);
1621
1622        assert_eq!(1, cmd.chunk_parallelism);
1623    }
1624
1625    #[test]
1626    fn test_progress_mode_defaults_to_auto() {
1627        let cmd = ExportCreateCommand::parse_from([
1628            "export-v2-create",
1629            "--addr",
1630            "127.0.0.1:4000",
1631            "--to",
1632            "file:///tmp/export-v2-test",
1633        ]);
1634
1635        assert_eq!(ProgressMode::Auto, cmd.progress);
1636    }
1637
1638    #[test]
1639    fn test_progress_mode_parses_explicit_values() {
1640        for (value, expected) in [
1641            ("auto", ProgressMode::Auto),
1642            ("always", ProgressMode::Always),
1643            ("never", ProgressMode::Never),
1644        ] {
1645            let cmd = ExportCreateCommand::parse_from([
1646                "export-v2-create",
1647                "--addr",
1648                "127.0.0.1:4000",
1649                "--to",
1650                "file:///tmp/export-v2-test",
1651                "--progress",
1652                value,
1653            ]);
1654
1655            assert_eq!(expected, cmd.progress);
1656        }
1657    }
1658
1659    #[test]
1660    fn test_progress_mode_rejects_unknown_value() {
1661        assert!(
1662            ExportCreateCommand::try_parse_from([
1663                "export-v2-create",
1664                "--addr",
1665                "127.0.0.1:4000",
1666                "--to",
1667                "file:///tmp/export-v2-test",
1668                "--progress",
1669                "bogus",
1670            ])
1671            .is_err()
1672        );
1673    }
1674
1675    #[test]
1676    fn test_chunk_parallelism_parses_valid_value() {
1677        let cmd = ExportCreateCommand::parse_from([
1678            "export-v2-create",
1679            "--addr",
1680            "127.0.0.1:4000",
1681            "--to",
1682            "file:///tmp/export-v2-test",
1683            "--chunk-parallelism",
1684            "64",
1685        ]);
1686
1687        assert_eq!(64, cmd.chunk_parallelism);
1688    }
1689
1690    #[test]
1691    fn test_chunk_parallelism_rejects_out_of_range_values() {
1692        assert!(
1693            ExportCreateCommand::try_parse_from([
1694                "export-v2-create",
1695                "--addr",
1696                "127.0.0.1:4000",
1697                "--to",
1698                "file:///tmp/export-v2-test",
1699                "--chunk-parallelism",
1700                "0",
1701            ])
1702            .is_err()
1703        );
1704        assert!(
1705            ExportCreateCommand::try_parse_from([
1706                "export-v2-create",
1707                "--addr",
1708                "127.0.0.1:4000",
1709                "--to",
1710                "file:///tmp/export-v2-test",
1711                "--chunk-parallelism",
1712                "65",
1713            ])
1714            .is_err()
1715        );
1716    }
1717
1718    #[test]
1719    fn test_schema_only_mode_mismatch_error_message() {
1720        let error = crate::data::export_v2::error::SchemaOnlyModeMismatchSnafu {
1721            existing_schema_only: false,
1722            requested_schema_only: true,
1723        }
1724        .build()
1725        .to_string();
1726
1727        assert!(error.contains("existing: false"));
1728        assert!(error.contains("requested: true"));
1729    }
1730
1731    #[test]
1732    fn test_validate_resume_config_rejects_catalog_mismatch() {
1733        let manifest = Manifest::new_for_export(
1734            "greptime".to_string(),
1735            vec!["public".to_string()],
1736            false,
1737            TimeRange::unbounded(),
1738            DataFormat::Parquet,
1739            None,
1740        )
1741        .unwrap();
1742        let config = ExportConfig {
1743            catalog: "other".to_string(),
1744            schemas: None,
1745            schema_only: false,
1746            format: DataFormat::Parquet,
1747            force: false,
1748            time_range: TimeRange::unbounded(),
1749            chunk_time_window: None,
1750            parallelism: 1,
1751            chunk_parallelism: 1,
1752            progress: ProgressMode::Auto,
1753            snapshot_uri: "file:///tmp/snapshot".to_string(),
1754            storage_config: ObjectStoreConfig::default(),
1755        };
1756
1757        let error = validate_resume_config(&manifest, &config)
1758            .err()
1759            .unwrap()
1760            .to_string();
1761        assert!(error.contains("catalog"));
1762    }
1763
1764    #[test]
1765    fn test_validate_resume_config_accepts_schema_selection_with_different_case_and_order() {
1766        let manifest = Manifest::new_for_export(
1767            "greptime".to_string(),
1768            vec!["public".to_string(), "analytics".to_string()],
1769            false,
1770            TimeRange::unbounded(),
1771            DataFormat::Parquet,
1772            None,
1773        )
1774        .unwrap();
1775        let config = ExportConfig {
1776            catalog: "greptime".to_string(),
1777            schemas: Some(vec![
1778                "ANALYTICS".to_string(),
1779                "PUBLIC".to_string(),
1780                "public".to_string(),
1781            ]),
1782            schema_only: false,
1783            format: DataFormat::Parquet,
1784            force: false,
1785            time_range: TimeRange::unbounded(),
1786            chunk_time_window: None,
1787            parallelism: 1,
1788            chunk_parallelism: 1,
1789            progress: ProgressMode::Auto,
1790            snapshot_uri: "file:///tmp/snapshot".to_string(),
1791            storage_config: ObjectStoreConfig::default(),
1792        };
1793
1794        assert!(validate_resume_config(&manifest, &config).is_ok());
1795    }
1796
1797    #[test]
1798    fn test_validate_resume_config_rejects_chunk_plan_mismatch() {
1799        let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1800        let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 2, 0, 0).unwrap();
1801        let time_range = TimeRange::new(Some(start), Some(end));
1802        let manifest = Manifest::new_for_export(
1803            "greptime".to_string(),
1804            vec!["public".to_string()],
1805            false,
1806            time_range.clone(),
1807            DataFormat::Parquet,
1808            None,
1809        )
1810        .unwrap();
1811        let config = ExportConfig {
1812            catalog: "greptime".to_string(),
1813            schemas: None,
1814            schema_only: false,
1815            format: DataFormat::Parquet,
1816            force: false,
1817            time_range,
1818            chunk_time_window: Some(Duration::from_secs(3600)),
1819            parallelism: 1,
1820            chunk_parallelism: 1,
1821            progress: ProgressMode::Auto,
1822            snapshot_uri: "file:///tmp/snapshot".to_string(),
1823            storage_config: ObjectStoreConfig::default(),
1824        };
1825
1826        let error = validate_resume_config(&manifest, &config)
1827            .err()
1828            .unwrap()
1829            .to_string();
1830        assert!(error.contains("chunk plan"));
1831    }
1832
1833    #[test]
1834    fn test_validate_resume_config_rejects_format_mismatch() {
1835        let manifest = Manifest::new_for_export(
1836            "greptime".to_string(),
1837            vec!["public".to_string()],
1838            false,
1839            TimeRange::unbounded(),
1840            DataFormat::Parquet,
1841            None,
1842        )
1843        .unwrap();
1844        let config = ExportConfig {
1845            catalog: "greptime".to_string(),
1846            schemas: None,
1847            schema_only: false,
1848            format: DataFormat::Csv,
1849            force: false,
1850            time_range: TimeRange::unbounded(),
1851            chunk_time_window: None,
1852            parallelism: 1,
1853            chunk_parallelism: 1,
1854            progress: ProgressMode::Auto,
1855            snapshot_uri: "file:///tmp/snapshot".to_string(),
1856            storage_config: ObjectStoreConfig::default(),
1857        };
1858
1859        let error = validate_resume_config(&manifest, &config)
1860            .err()
1861            .unwrap()
1862            .to_string();
1863        assert!(error.contains("format"));
1864    }
1865
1866    #[test]
1867    fn test_validate_resume_config_rejects_time_range_mismatch() {
1868        let start = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 0, 0, 0).unwrap();
1869        let end = chrono::Utc.with_ymd_and_hms(2025, 1, 1, 1, 0, 0).unwrap();
1870        let manifest = Manifest::new_for_export(
1871            "greptime".to_string(),
1872            vec!["public".to_string()],
1873            false,
1874            TimeRange::new(Some(start), Some(end)),
1875            DataFormat::Parquet,
1876            None,
1877        )
1878        .unwrap();
1879        let config = ExportConfig {
1880            catalog: "greptime".to_string(),
1881            schemas: None,
1882            schema_only: false,
1883            format: DataFormat::Parquet,
1884            force: false,
1885            time_range: TimeRange::new(Some(start), Some(start)),
1886            chunk_time_window: None,
1887            parallelism: 1,
1888            chunk_parallelism: 1,
1889            progress: ProgressMode::Auto,
1890            snapshot_uri: "file:///tmp/snapshot".to_string(),
1891            storage_config: ObjectStoreConfig::default(),
1892        };
1893
1894        let error = validate_resume_config(&manifest, &config)
1895            .err()
1896            .unwrap()
1897            .to_string();
1898        assert!(error.contains("time_range"));
1899    }
1900
1901    #[tokio::test]
1902    async fn test_scan_snapshots_sorts_and_tracks_unreadable_manifests() {
1903        let dir = tempdir().unwrap();
1904        write_test_manifest(
1905            dir.path(),
1906            "older",
1907            test_manifest(
1908                chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1909                false,
1910                true,
1911            ),
1912        );
1913        write_test_manifest(
1914            dir.path(),
1915            "newer",
1916            test_manifest(
1917                chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap(),
1918                false,
1919                true,
1920            ),
1921        );
1922
1923        std::fs::create_dir_all(dir.path().join("empty-dir")).unwrap();
1924        std::fs::create_dir_all(dir.path().join("not-snapshot")).unwrap();
1925        std::fs::write(dir.path().join("not-snapshot").join("data.txt"), "x").unwrap();
1926        std::fs::create_dir_all(dir.path().join("broken")).unwrap();
1927        std::fs::write(dir.path().join("broken").join(MANIFEST_FILE), "{not-json").unwrap();
1928
1929        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
1930        let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
1931        let result = scan_snapshots(&storage).await.unwrap();
1932
1933        assert_eq!(result.snapshots.len(), 2);
1934        assert_eq!(
1935            result.snapshots[0].manifest.created_at,
1936            chrono::Utc.with_ymd_and_hms(2026, 2, 1, 0, 0, 0).unwrap()
1937        );
1938        assert_eq!(
1939            result.snapshots[1].manifest.created_at,
1940            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap()
1941        );
1942        assert_eq!(result.unreadable, vec!["broken/".to_string()]);
1943        assert_eq!(result.snapshots[0].path, "newer/");
1944        assert_eq!(result.snapshots[1].path, "older/");
1945    }
1946
1947    #[test]
1948    fn test_snapshot_list_status_and_chunk_summary() {
1949        let schema_only = test_manifest(
1950            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1951            true,
1952            true,
1953        );
1954        assert_eq!(snapshot_status(&schema_only), "schema-only");
1955        assert_eq!(format_list_chunks(&schema_only), "0");
1956
1957        let complete = test_manifest(
1958            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1959            false,
1960            true,
1961        );
1962        assert_eq!(snapshot_status(&complete), "complete");
1963        assert_eq!(format_list_chunks(&complete), "2/2");
1964        assert_eq!(format_delete_chunks(&complete), "2 (all processed)");
1965
1966        let incomplete = test_manifest(
1967            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
1968            false,
1969            false,
1970        );
1971        assert_eq!(snapshot_status(&incomplete), "incomplete");
1972        assert_eq!(format_list_chunks(&incomplete), "1/2");
1973        assert_eq!(
1974            format_delete_chunks(&incomplete),
1975            "2 (1 completed, 0 skipped, 1 pending, 0 in_progress, 0 failed)"
1976        );
1977    }
1978
1979    #[tokio::test]
1980    async fn test_delete_build_rejects_bucket_root_uri() {
1981        let cmd = ExportDeleteCommand::parse_from([
1982            "export-v2-delete",
1983            "--snapshot",
1984            "s3://bucket",
1985            "--no-confirm",
1986        ]);
1987
1988        let error = cmd.build().await.err().unwrap().to_string();
1989        assert!(error.contains("non-empty path"));
1990    }
1991
1992    #[test]
1993    fn test_delete_skip_confirmation_aliases() {
1994        let no_confirm = ExportDeleteCommand::parse_from([
1995            "export-v2-delete",
1996            "--snapshot",
1997            "s3://bucket/snapshot",
1998            "--no-confirm",
1999        ]);
2000        assert!(no_confirm.skip_confirmation);
2001
2002        let yes = ExportDeleteCommand::parse_from([
2003            "export-v2-delete",
2004            "--snapshot",
2005            "s3://bucket/snapshot",
2006            "--yes",
2007        ]);
2008        assert!(yes.skip_confirmation);
2009    }
2010
2011    #[tokio::test]
2012    async fn test_delete_snapshot_with_no_confirm_removes_snapshot_contents() {
2013        let parent = tempdir().unwrap();
2014        let snapshot = parent.path().join("snapshot");
2015        let sibling = parent.path().join("sibling");
2016        std::fs::create_dir_all(&snapshot).unwrap();
2017        std::fs::create_dir_all(&sibling).unwrap();
2018        std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
2019        write_root_manifest(
2020            &snapshot,
2021            test_manifest(
2022                chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2023                true,
2024                true,
2025            ),
2026        );
2027        write_snapshot_file(&snapshot, "schema/schemas.json", b"[]");
2028
2029        let uri = Url::from_directory_path(&snapshot).unwrap().to_string();
2030        let delete = ExportDelete {
2031            snapshot: uri,
2032            skip_confirmation: true,
2033            storage: file_storage_for_dir(&snapshot),
2034        };
2035
2036        delete
2037            .run_with_confirmation(|_| unreachable!())
2038            .await
2039            .unwrap();
2040
2041        assert!(!snapshot.join(MANIFEST_FILE).exists());
2042        assert!(!snapshot.join("schema/schemas.json").exists());
2043        assert!(sibling.join("keep.txt").exists());
2044    }
2045
2046    #[tokio::test]
2047    async fn test_delete_snapshot_requires_manifest() {
2048        let dir = tempdir().unwrap();
2049        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
2050        let delete = ExportDelete {
2051            snapshot: uri,
2052            skip_confirmation: true,
2053            storage: file_storage_for_dir(dir.path()),
2054        };
2055
2056        let error = delete
2057            .run_with_confirmation(|_| unreachable!())
2058            .await
2059            .err()
2060            .unwrap()
2061            .to_string();
2062
2063        assert!(error.contains("Snapshot not found"));
2064        assert!(dir.path().exists());
2065    }
2066
2067    #[tokio::test]
2068    async fn test_delete_snapshot_cancels_without_exact_confirmation() {
2069        let dir = tempdir().unwrap();
2070        write_root_manifest(
2071            dir.path(),
2072            test_manifest(
2073                chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2074                true,
2075                true,
2076            ),
2077        );
2078        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2079        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
2080        let delete = ExportDelete {
2081            snapshot: uri.clone(),
2082            skip_confirmation: false,
2083            storage: file_storage_for_dir(dir.path()),
2084        };
2085
2086        delete
2087            .run_with_confirmation(|snapshot| {
2088                assert_eq!(snapshot, uri);
2089                Ok(false)
2090            })
2091            .await
2092            .unwrap();
2093
2094        assert!(dir.path().join(MANIFEST_FILE).exists());
2095        assert!(dir.path().join("schema/schemas.json").exists());
2096    }
2097
2098    #[test]
2099    fn test_delete_confirmation_requires_exact_yes() {
2100        assert!(delete_confirmation_matches("yes"));
2101        assert!(delete_confirmation_matches(" yes\n"));
2102        assert!(!delete_confirmation_matches("YES"));
2103        assert!(!delete_confirmation_matches("y"));
2104        assert!(!delete_confirmation_matches("yes please"));
2105    }
2106
2107    #[test]
2108    fn test_display_snapshot_prefix_adds_trailing_slash() {
2109        assert_eq!(
2110            display_snapshot_prefix("s3://bucket/snapshot"),
2111            "s3://bucket/snapshot/"
2112        );
2113        assert_eq!(
2114            display_snapshot_prefix("s3://bucket/snapshot/"),
2115            "s3://bucket/snapshot/"
2116        );
2117    }
2118
2119    #[tokio::test]
2120    async fn test_verify_snapshot_accepts_valid_full_snapshot() {
2121        let dir = tempdir().unwrap();
2122        let manifest = test_manifest(
2123            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2124            false,
2125            true,
2126        );
2127        write_root_manifest(dir.path(), manifest);
2128        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2129        write_default_ddl_files(dir.path());
2130        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2131
2132        let storage = file_storage_for_dir(dir.path());
2133        let report = verify_snapshot(&storage).await.unwrap();
2134
2135        assert_eq!(report.error_count(), 0);
2136        assert_eq!(report.warning_count(), 0);
2137        assert_eq!(report.data_files_total, 1);
2138        assert_eq!(report.data_files_verified, 1);
2139    }
2140
2141    #[tokio::test]
2142    async fn test_verify_snapshot_reports_missing_data_file_and_failed_chunk() {
2143        let dir = tempdir().unwrap();
2144        let mut manifest = test_manifest(
2145            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2146            false,
2147            true,
2148        );
2149        manifest.chunks[1].mark_failed("copy failed".to_string());
2150        write_root_manifest(dir.path(), manifest);
2151        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2152        write_default_ddl_files(dir.path());
2153
2154        let storage = file_storage_for_dir(dir.path());
2155        let report = verify_snapshot(&storage).await.unwrap();
2156
2157        assert_eq!(report.error_count(), 2);
2158        assert!(
2159            report
2160                .problems
2161                .iter()
2162                .any(|problem| problem.message.contains("missing file"))
2163        );
2164        assert!(
2165            report
2166                .problems
2167                .iter()
2168                .any(|problem| problem.message.contains("status is 'failed'"))
2169        );
2170    }
2171
2172    #[tokio::test]
2173    async fn test_verify_snapshot_reports_missing_schema_index_as_warning() {
2174        let dir = tempdir().unwrap();
2175        let manifest = test_manifest(
2176            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2177            false,
2178            true,
2179        );
2180        write_root_manifest(dir.path(), manifest);
2181        write_default_ddl_files(dir.path());
2182        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2183
2184        let storage = file_storage_for_dir(dir.path());
2185        let report = verify_snapshot(&storage).await.unwrap();
2186
2187        assert_eq!(report.error_count(), 0);
2188        assert_eq!(report.warning_count(), 1);
2189        assert!(
2190            report
2191                .problems
2192                .iter()
2193                .any(|problem| problem.message.contains("Missing schema index"))
2194        );
2195    }
2196
2197    #[tokio::test]
2198    async fn test_verify_snapshot_rejects_schema_only_snapshot_with_chunks() {
2199        let dir = tempdir().unwrap();
2200        let mut manifest = test_manifest(
2201            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2202            true,
2203            true,
2204        );
2205        let mut chunk = ChunkMeta::new(1, TimeRange::unbounded());
2206        chunk.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2207        manifest.chunks.push(chunk);
2208        write_root_manifest(dir.path(), manifest);
2209        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2210        write_default_ddl_files(dir.path());
2211
2212        let storage = file_storage_for_dir(dir.path());
2213        let report = verify_snapshot(&storage).await.unwrap();
2214
2215        assert_eq!(report.error_count(), 1);
2216        assert_eq!(report.data_files_total, 0);
2217        assert!(
2218            report
2219                .problems
2220                .iter()
2221                .any(|problem| problem.message.contains("should not contain data chunks"))
2222        );
2223    }
2224
2225    #[tokio::test]
2226    async fn test_verify_snapshot_rejects_schema_only_snapshot_with_data_files() {
2227        let dir = tempdir().unwrap();
2228        let manifest = test_manifest(
2229            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2230            true,
2231            true,
2232        );
2233        write_root_manifest(dir.path(), manifest);
2234        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2235        write_default_ddl_files(dir.path());
2236        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2237
2238        let storage = file_storage_for_dir(dir.path());
2239        let report = verify_snapshot(&storage).await.unwrap();
2240
2241        assert_eq!(report.error_count(), 1);
2242        assert_eq!(report.data_files_total, 0);
2243        assert!(
2244            report
2245                .problems
2246                .iter()
2247                .any(|problem| problem.message.contains("should not contain data files"))
2248        );
2249    }
2250
2251    #[tokio::test]
2252    async fn test_verify_snapshot_rejects_full_snapshot_without_chunks() {
2253        let dir = tempdir().unwrap();
2254        let mut manifest = test_manifest(
2255            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2256            false,
2257            true,
2258        );
2259        manifest.chunks.clear();
2260        write_root_manifest(dir.path(), manifest);
2261        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2262        write_default_ddl_files(dir.path());
2263
2264        let storage = file_storage_for_dir(dir.path());
2265        let report = verify_snapshot(&storage).await.unwrap();
2266
2267        assert_eq!(report.error_count(), 1);
2268        assert_eq!(report.data_files_total, 0);
2269        assert!(
2270            report
2271                .problems
2272                .iter()
2273                .any(|problem| problem.message.contains("at least one data chunk"))
2274        );
2275    }
2276
2277    #[tokio::test]
2278    async fn test_verify_snapshot_rejects_skipped_chunk_data_files() {
2279        let dir = tempdir().unwrap();
2280        let manifest = test_manifest(
2281            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2282            false,
2283            true,
2284        );
2285        write_root_manifest(dir.path(), manifest);
2286        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2287        write_default_ddl_files(dir.path());
2288        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2289        write_snapshot_file(dir.path(), "data/public/2/file.parquet", b"data");
2290
2291        let storage = file_storage_for_dir(dir.path());
2292        let report = verify_snapshot(&storage).await.unwrap();
2293
2294        assert_eq!(report.error_count(), 1);
2295        assert!(
2296            report
2297                .problems
2298                .iter()
2299                .any(|problem| { problem.message.contains("Unexpected data file") })
2300        );
2301    }
2302
2303    #[tokio::test]
2304    async fn test_verify_snapshot_rejects_duplicate_chunk_ids() {
2305        let dir = tempdir().unwrap();
2306        let mut manifest = test_manifest(
2307            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2308            false,
2309            true,
2310        );
2311        let mut duplicate = ChunkMeta::new(1, TimeRange::unbounded());
2312        duplicate.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2313        manifest.chunks.push(duplicate);
2314        write_root_manifest(dir.path(), manifest);
2315        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2316        write_default_ddl_files(dir.path());
2317        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2318
2319        let storage = file_storage_for_dir(dir.path());
2320        let report = verify_snapshot(&storage).await.unwrap();
2321
2322        assert_eq!(report.error_count(), 1);
2323        assert!(
2324            report
2325                .problems
2326                .iter()
2327                .any(|problem| problem.message.contains("duplicate chunk id"))
2328        );
2329    }
2330
2331    #[tokio::test]
2332    async fn test_verify_snapshot_requires_all_schema_ddl() {
2333        let dir = tempdir().unwrap();
2334        let manifest = test_manifest(
2335            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2336            true,
2337            true,
2338        );
2339        write_root_manifest(dir.path(), manifest);
2340        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2341        write_snapshot_file(
2342            dir.path(),
2343            "schema/ddl/public.sql",
2344            b"CREATE DATABASE public;",
2345        );
2346
2347        let storage = file_storage_for_dir(dir.path());
2348        let report = verify_snapshot(&storage).await.unwrap();
2349
2350        assert_eq!(report.error_count(), 1);
2351        assert!(
2352            report
2353                .problems
2354                .iter()
2355                .any(|problem| problem.message.contains("analytics"))
2356        );
2357    }
2358
2359    #[tokio::test]
2360    async fn test_verify_snapshot_reports_missing_ddl_dir() {
2361        let dir = tempdir().unwrap();
2362        let manifest = test_manifest(
2363            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2364            false,
2365            true,
2366        );
2367        write_root_manifest(dir.path(), manifest);
2368        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2369        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2370
2371        let storage = file_storage_for_dir(dir.path());
2372        let report = verify_snapshot(&storage).await.unwrap();
2373
2374        assert_eq!(report.error_count(), 2);
2375        assert!(
2376            report
2377                .problems
2378                .iter()
2379                .any(|problem| problem.message.contains("schema/ddl/public.sql"))
2380        );
2381        assert!(
2382            report
2383                .problems
2384                .iter()
2385                .any(|problem| problem.message.contains("schema/ddl/analytics.sql"))
2386        );
2387    }
2388
2389    #[tokio::test]
2390    async fn test_verify_snapshot_reports_manifest_version_mismatch() {
2391        let dir = tempdir().unwrap();
2392        let mut manifest = test_manifest(
2393            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2394            false,
2395            true,
2396        );
2397        manifest.version = MANIFEST_VERSION + 1;
2398        write_root_manifest(dir.path(), manifest);
2399        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2400        write_default_ddl_files(dir.path());
2401        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2402
2403        let storage = file_storage_for_dir(dir.path());
2404        let report = verify_snapshot(&storage).await.unwrap();
2405
2406        assert_eq!(report.error_count(), 1);
2407        assert!(
2408            report
2409                .problems
2410                .iter()
2411                .any(|problem| problem.message.contains("Manifest version mismatch"))
2412        );
2413    }
2414
2415    #[tokio::test]
2416    async fn test_verify_snapshot_rejects_invalid_data_file_paths() {
2417        let dir = tempdir().unwrap();
2418        let mut manifest = test_manifest(
2419            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2420            false,
2421            true,
2422        );
2423        manifest.chunks[0].files = vec!["data/public/1/../file.parquet".to_string()];
2424        write_root_manifest(dir.path(), manifest);
2425        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2426        write_default_ddl_files(dir.path());
2427
2428        let storage = file_storage_for_dir(dir.path());
2429        let report = verify_snapshot(&storage).await.unwrap();
2430
2431        assert_eq!(report.error_count(), 1);
2432        assert!(
2433            report
2434                .problems
2435                .iter()
2436                .any(|problem| problem.message.contains("invalid data file path"))
2437        );
2438        assert_eq!(report.data_files_verified, 0);
2439    }
2440
2441    #[tokio::test]
2442    async fn test_verify_snapshot_accepts_leading_slash_manifest_data_paths() {
2443        let dir = tempdir().unwrap();
2444        let mut manifest = test_manifest(
2445            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2446            false,
2447            true,
2448        );
2449        manifest.chunks[0].files = vec!["/data/public/1/file.parquet".to_string()];
2450        write_root_manifest(dir.path(), manifest);
2451        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2452        write_default_ddl_files(dir.path());
2453        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2454
2455        let storage = file_storage_for_dir(dir.path());
2456        let report = verify_snapshot(&storage).await.unwrap();
2457
2458        assert_eq!(report.error_count(), 0);
2459        assert_eq!(report.data_files_verified, 1);
2460    }
2461
2462    #[tokio::test]
2463    async fn test_verify_snapshot_rejects_unlisted_files_under_completed_chunk_prefix() {
2464        let dir = tempdir().unwrap();
2465        let manifest = test_manifest(
2466            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2467            false,
2468            true,
2469        );
2470        write_root_manifest(dir.path(), manifest);
2471        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2472        write_default_ddl_files(dir.path());
2473        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2474        write_snapshot_file(dir.path(), "data/public/1/extra.parquet", b"data");
2475
2476        let storage = file_storage_for_dir(dir.path());
2477        let report = verify_snapshot(&storage).await.unwrap();
2478
2479        assert_eq!(report.error_count(), 1);
2480        assert!(
2481            report
2482                .problems
2483                .iter()
2484                .any(|problem| problem.message.contains("Unexpected data file"))
2485        );
2486        assert_eq!(report.data_files_verified, 1);
2487    }
2488
2489    #[tokio::test]
2490    async fn test_verify_snapshot_rejects_orphan_data_files_outside_known_chunk_prefixes() {
2491        let dir = tempdir().unwrap();
2492        let manifest = test_manifest(
2493            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2494            false,
2495            true,
2496        );
2497        write_root_manifest(dir.path(), manifest);
2498        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2499        write_default_ddl_files(dir.path());
2500        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2501        write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2502
2503        let storage = file_storage_for_dir(dir.path());
2504        let report = verify_snapshot(&storage).await.unwrap();
2505
2506        assert_eq!(report.error_count(), 1);
2507        assert!(
2508            report
2509                .problems
2510                .iter()
2511                .any(|problem| problem.message.contains("Unexpected data file"))
2512        );
2513        assert_eq!(report.data_files_verified, 1);
2514    }
2515
2516    #[tokio::test]
2517    async fn test_verify_snapshot_rejects_data_files_under_wrong_chunk_or_schema() {
2518        let dir = tempdir().unwrap();
2519        let mut manifest = test_manifest(
2520            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2521            false,
2522            true,
2523        );
2524        manifest.chunks[0].files = vec![
2525            "data/public/99/file.parquet".to_string(),
2526            "data/metrics/1/file.parquet".to_string(),
2527        ];
2528        write_root_manifest(dir.path(), manifest);
2529        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2530        write_default_ddl_files(dir.path());
2531        write_snapshot_file(dir.path(), "data/public/99/file.parquet", b"data");
2532        write_snapshot_file(dir.path(), "data/metrics/1/file.parquet", b"data");
2533
2534        let storage = file_storage_for_dir(dir.path());
2535        let report = verify_snapshot(&storage).await.unwrap();
2536
2537        assert_eq!(report.error_count(), 2);
2538        assert_eq!(report.data_files_verified, 0);
2539        assert!(
2540            report
2541                .problems
2542                .iter()
2543                .all(|problem| problem.message.contains("invalid data file path"))
2544        );
2545    }
2546
2547    #[test]
2548    fn test_build_verify_plan_classifies_chunks_without_io() {
2549        let mut manifest = test_manifest(
2550            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2551            false,
2552            true,
2553        );
2554        // test_manifest(complete) gives: chunk 1 completed (1 file), chunk 2 skipped.
2555        let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
2556        failed.mark_failed("boom".to_string());
2557        manifest.chunks.push(failed);
2558        manifest
2559            .chunks
2560            .push(ChunkMeta::new(4, TimeRange::unbounded()));
2561
2562        let plan = build_verify_plan(&manifest);
2563
2564        assert_eq!(plan.files_to_check.len(), 1);
2565        assert_eq!(plan.files_to_check[0].chunk_id, 1);
2566        assert_eq!(plan.files_to_check[0].path, "data/public/1/file.parquet");
2567        assert_eq!(plan.data_files_total, 1);
2568        assert!(
2569            plan.claimed_data_files
2570                .contains("data/public/1/file.parquet")
2571        );
2572        assert_eq!(plan.problems.len(), 2);
2573        assert!(
2574            plan.problems
2575                .iter()
2576                .any(|problem| problem.message.contains("status is 'failed'"))
2577        );
2578        assert!(
2579            plan.problems
2580                .iter()
2581                .any(|problem| problem.message.contains("status is 'pending'"))
2582        );
2583    }
2584
2585    #[tokio::test]
2586    async fn test_verify_snapshot_produces_deterministic_problem_output() {
2587        let dir = tempdir().unwrap();
2588        let manifest = test_manifest(
2589            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
2590            false,
2591            true,
2592        );
2593        write_root_manifest(dir.path(), manifest);
2594        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
2595        write_default_ddl_files(dir.path());
2596        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
2597        // Many orphan files under a known chunk prefix to stress ordering.
2598        for i in 0..50 {
2599            write_snapshot_file(
2600                dir.path(),
2601                &format!("data/public/1/orphan_{:02}.parquet", i),
2602                b"x",
2603            );
2604        }
2605
2606        let storage = file_storage_for_dir(dir.path());
2607        let messages = |report: &VerifyReport| {
2608            report
2609                .problems
2610                .iter()
2611                .map(|problem| problem.message.clone())
2612                .collect::<Vec<_>>()
2613        };
2614        let first = messages(&verify_snapshot(&storage).await.unwrap());
2615        let second = messages(&verify_snapshot(&storage).await.unwrap());
2616
2617        // Output is identical across runs despite HashSet-based scanning.
2618        assert_eq!(first, second);
2619
2620        let orphans = first
2621            .iter()
2622            .filter(|message| message.contains("Unexpected data file"))
2623            .cloned()
2624            .collect::<Vec<_>>();
2625        assert_eq!(orphans.len(), 50);
2626        let mut sorted = orphans.clone();
2627        sorted.sort();
2628        assert_eq!(orphans, sorted);
2629    }
2630
2631    fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) {
2632        let snapshot_dir = root.join(dir);
2633        std::fs::create_dir_all(&snapshot_dir).unwrap();
2634        std::fs::write(
2635            snapshot_dir.join(MANIFEST_FILE),
2636            serde_json::to_vec_pretty(&manifest).unwrap(),
2637        )
2638        .unwrap();
2639    }
2640
2641    fn write_root_manifest(root: &std::path::Path, manifest: Manifest) {
2642        std::fs::write(
2643            root.join(MANIFEST_FILE),
2644            serde_json::to_vec_pretty(&manifest).unwrap(),
2645        )
2646        .unwrap();
2647    }
2648
2649    fn write_snapshot_file(root: &std::path::Path, relative_path: &str, content: &[u8]) {
2650        let mut path = root.to_path_buf();
2651        for segment in relative_path.split('/') {
2652            path.push(segment);
2653        }
2654        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2655        std::fs::write(path, content).unwrap();
2656    }
2657
2658    fn write_default_ddl_files(root: &std::path::Path) {
2659        write_snapshot_file(root, "schema/ddl/public.sql", b"CREATE DATABASE public;");
2660        write_snapshot_file(
2661            root,
2662            "schema/ddl/analytics.sql",
2663            b"CREATE DATABASE analytics;",
2664        );
2665    }
2666
2667    fn file_storage_for_dir(root: &std::path::Path) -> OpenDalStorage {
2668        let uri = Url::from_directory_path(root).unwrap().to_string();
2669        OpenDalStorage::from_file_uri(&uri).unwrap()
2670    }
2671
2672    fn test_manifest(
2673        created_at: chrono::DateTime<chrono::Utc>,
2674        schema_only: bool,
2675        complete: bool,
2676    ) -> Manifest {
2677        let mut manifest = Manifest::new_for_export(
2678            "greptime".to_string(),
2679            vec!["public".to_string(), "analytics".to_string()],
2680            schema_only,
2681            TimeRange::unbounded(),
2682            DataFormat::Parquet,
2683            None,
2684        )
2685        .unwrap();
2686        manifest.created_at = created_at;
2687        manifest.updated_at = created_at;
2688
2689        if !schema_only {
2690            manifest.chunks.clear();
2691            let mut first = ChunkMeta::new(1, TimeRange::unbounded());
2692            first.mark_completed(vec!["data/public/1/file.parquet".to_string()], None);
2693            manifest.chunks.push(first);
2694
2695            if complete {
2696                manifest
2697                    .chunks
2698                    .push(ChunkMeta::skipped(2, TimeRange::unbounded()));
2699            } else {
2700                manifest
2701                    .chunks
2702                    .push(ChunkMeta::new(2, TimeRange::unbounded()));
2703            }
2704        }
2705
2706        manifest
2707    }
2708}