cli/data/
export.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashSet;
16use std::sync::Arc;
17use std::time::Duration;
18
19use async_trait::async_trait;
20use clap::{Parser, ValueEnum};
21use common_error::ext::BoxedError;
22use common_telemetry::{debug, error, info};
23use object_store::ObjectStore;
24use serde_json::Value;
25use snafu::{OptionExt, ResultExt};
26use tokio::sync::Semaphore;
27use tokio::time::Instant;
28
29use crate::common::{ObjectStoreConfig, new_fs_object_store};
30use crate::data::storage_export::{
31    AzblobBackend, FsBackend, GcsBackend, OssBackend, S3Backend, StorageExport, StorageType,
32};
33use crate::data::{COPY_PATH_PLACEHOLDER, default_database};
34use crate::database::{DatabaseClient, parse_proxy_opts};
35use crate::error::{
36    EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, SchemaNotFoundSnafu,
37};
38use crate::{Tool, database};
39
40type TableReference = (String, String, String);
41
42#[derive(Debug, Default, Clone, ValueEnum)]
43enum ExportTarget {
44    /// Export all table schemas, corresponding to `SHOW CREATE TABLE`.
45    Schema,
46    /// Export all table data, corresponding to `COPY DATABASE TO`.
47    Data,
48    /// Export all table schemas and data at once.
49    #[default]
50    All,
51}
52
53/// Command for exporting data from the GreptimeDB.
54#[derive(Debug, Default, Parser)]
55pub struct ExportCommand {
56    /// Server address to connect
57    #[clap(long)]
58    addr: String,
59
60    /// Directory to put the exported data. E.g.: /tmp/greptimedb-export
61    /// for local export.
62    #[clap(long)]
63    output_dir: Option<String>,
64
65    /// The name of the catalog to export.
66    #[clap(long, default_value_t = default_database())]
67    database: String,
68
69    /// The number of databases exported in parallel.
70    /// For example, if there are 20 databases and `db_parallelism` is 4,
71    /// 4 databases will be exported concurrently.
72    #[clap(long, short = 'j', default_value = "1", alias = "export-jobs")]
73    db_parallelism: usize,
74
75    /// The number of tables exported in parallel within a single database.
76    /// For example, if a database has 30 tables and `parallelism` is 8,
77    /// 8 tables will be exported concurrently.
78    #[clap(long, default_value = "4")]
79    table_parallelism: usize,
80
81    /// Max retry times for each job.
82    #[clap(long, default_value = "3")]
83    max_retry: usize,
84
85    /// Things to export
86    #[clap(long, short = 't', value_enum, default_value = "all")]
87    target: ExportTarget,
88
89    /// A half-open time range: [start_time, end_time).
90    /// The start of the time range (time-index column) for data export.
91    #[clap(long)]
92    start_time: Option<String>,
93
94    /// A half-open time range: [start_time, end_time).
95    /// The end of the time range (time-index column) for data export.
96    #[clap(long)]
97    end_time: Option<String>,
98
99    /// The basic authentication for connecting to the server
100    #[clap(long)]
101    auth_basic: Option<String>,
102
103    /// The timeout of invoking the database.
104    ///
105    /// It is used to override the server-side timeout setting.
106    /// The default behavior will disable server-side default timeout(i.e. `0s`).
107    #[clap(long, value_parser = humantime::parse_duration)]
108    timeout: Option<Duration>,
109
110    /// The proxy server address to connect, if set, will override the system proxy.
111    ///
112    /// The default behavior will use the system proxy if neither `proxy` nor `no_proxy` is set.
113    #[clap(long)]
114    proxy: Option<String>,
115
116    /// Disable proxy server, if set, will not use any proxy.
117    #[clap(long)]
118    no_proxy: bool,
119
120    /// if both `ddl_local_dir` and remote storage are set, `ddl_local_dir` will be only used for
121    /// exported SQL files, and the data will be exported to remote storage.
122    ///
123    /// Note that `ddl_local_dir` export sql files to **LOCAL** file system, this is useful if export client don't have
124    /// direct access to remote storage.
125    ///
126    /// if remote storage is set but `ddl_local_dir` is not set, both SQL&data will be exported to remote storage.
127    #[clap(long)]
128    ddl_local_dir: Option<String>,
129
130    #[clap(flatten)]
131    storage: ObjectStoreConfig,
132}
133
134impl ExportCommand {
135    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
136        // Determine storage type
137        let (storage_type, operator) = if self.storage.enable_s3 {
138            (
139                StorageType::S3(S3Backend::new(self.storage.s3.clone())?),
140                self.storage.build_s3()?,
141            )
142        } else if self.storage.enable_oss {
143            (
144                StorageType::Oss(OssBackend::new(self.storage.oss.clone())?),
145                self.storage.build_oss()?,
146            )
147        } else if self.storage.enable_gcs {
148            (
149                StorageType::Gcs(GcsBackend::new(self.storage.gcs.clone())?),
150                self.storage.build_gcs()?,
151            )
152        } else if self.storage.enable_azblob {
153            (
154                StorageType::Azblob(AzblobBackend::new(self.storage.azblob.clone())?),
155                self.storage.build_azblob()?,
156            )
157        } else if let Some(output_dir) = &self.output_dir {
158            (
159                StorageType::Fs(FsBackend::new(output_dir.clone())),
160                new_fs_object_store(output_dir)?,
161            )
162        } else {
163            return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
164        };
165
166        let (catalog, schema) =
167            database::split_database(&self.database).map_err(BoxedError::new)?;
168        let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
169        let database_client = DatabaseClient::new(
170            self.addr.clone(),
171            catalog.clone(),
172            self.auth_basic.clone(),
173            // Treats `None` as `0s` to disable server-side default timeout.
174            self.timeout.unwrap_or_default(),
175            proxy,
176        );
177
178        Ok(Box::new(Export {
179            catalog,
180            schema,
181            database_client,
182            export_jobs: self.db_parallelism,
183            target: self.target.clone(),
184            start_time: self.start_time.clone(),
185            end_time: self.end_time.clone(),
186            parallelism: self.table_parallelism,
187            storage_type,
188            ddl_local_dir: self.ddl_local_dir.clone(),
189            operator,
190        }))
191    }
192}
193
194#[derive(Clone)]
195pub struct Export {
196    catalog: String,
197    schema: Option<String>,
198    database_client: DatabaseClient,
199    export_jobs: usize,
200    target: ExportTarget,
201    start_time: Option<String>,
202    end_time: Option<String>,
203    parallelism: usize,
204    storage_type: StorageType,
205    ddl_local_dir: Option<String>,
206    operator: ObjectStore,
207}
208
209impl Export {
210    async fn get_db_names(&self) -> Result<Vec<String>> {
211        let db_names = self.all_db_names().await?;
212        let Some(schema) = &self.schema else {
213            return Ok(db_names);
214        };
215
216        // Check if the schema exists
217        db_names
218            .into_iter()
219            .find(|db_name| db_name.to_lowercase() == schema.to_lowercase())
220            .map(|name| vec![name])
221            .context(SchemaNotFoundSnafu {
222                catalog: &self.catalog,
223                schema,
224            })
225    }
226
227    /// Iterate over all db names.
228    async fn all_db_names(&self) -> Result<Vec<String>> {
229        let records = self
230            .database_client
231            .sql_in_public("SHOW DATABASES")
232            .await?
233            .context(EmptyResultSnafu)?;
234        let mut result = Vec::with_capacity(records.len());
235        for value in records {
236            let Value::String(schema) = &value[0] else {
237                unreachable!()
238            };
239            if schema == common_catalog::consts::INFORMATION_SCHEMA_NAME {
240                continue;
241            }
242            if schema == common_catalog::consts::PG_CATALOG_NAME {
243                continue;
244            }
245            result.push(schema.clone());
246        }
247        Ok(result)
248    }
249
250    /// Return a list of [`TableReference`] to be exported.
251    /// Includes all tables under the given `catalog` and `schema`.
252    async fn get_table_list(
253        &self,
254        catalog: &str,
255        schema: &str,
256    ) -> Result<(
257        Vec<TableReference>,
258        Vec<TableReference>,
259        Vec<TableReference>,
260    )> {
261        // Puts all metric table first
262        let sql = format!(
263            "SELECT table_catalog, table_schema, table_name \
264            FROM information_schema.columns \
265            WHERE column_name = '__tsid' \
266                and table_catalog = \'{catalog}\' \
267                and table_schema = \'{schema}\'"
268        );
269        let records = self
270            .database_client
271            .sql_in_public(&sql)
272            .await?
273            .context(EmptyResultSnafu)?;
274        let mut metric_physical_tables = HashSet::with_capacity(records.len());
275        for value in records {
276            let mut t = Vec::with_capacity(3);
277            for v in &value {
278                let Value::String(value) = v else {
279                    unreachable!()
280                };
281                t.push(value);
282            }
283            metric_physical_tables.insert((t[0].clone(), t[1].clone(), t[2].clone()));
284        }
285
286        let sql = format!(
287            "SELECT table_catalog, table_schema, table_name, table_type \
288            FROM information_schema.tables \
289            WHERE (table_type = \'BASE TABLE\' OR table_type = \'VIEW\') \
290                and table_catalog = \'{catalog}\' \
291                and table_schema = \'{schema}\'",
292        );
293        let records = self
294            .database_client
295            .sql_in_public(&sql)
296            .await?
297            .context(EmptyResultSnafu)?;
298
299        debug!("Fetched table/view list: {:?}", records);
300
301        if records.is_empty() {
302            return Ok((vec![], vec![], vec![]));
303        }
304
305        let mut remaining_tables = Vec::with_capacity(records.len());
306        let mut views = Vec::new();
307        for value in records {
308            let mut t = Vec::with_capacity(4);
309            for v in &value {
310                let Value::String(value) = v else {
311                    unreachable!()
312                };
313                t.push(value);
314            }
315            let table = (t[0].clone(), t[1].clone(), t[2].clone());
316            let table_type = t[3].as_str();
317            // Ignores the physical table
318            if !metric_physical_tables.contains(&table) {
319                if table_type == "VIEW" {
320                    views.push(table);
321                } else {
322                    remaining_tables.push(table);
323                }
324            }
325        }
326
327        Ok((
328            metric_physical_tables.into_iter().collect(),
329            remaining_tables,
330            views,
331        ))
332    }
333
334    async fn show_create(
335        &self,
336        show_type: &str,
337        catalog: &str,
338        schema: &str,
339        table: Option<&str>,
340    ) -> Result<String> {
341        let sql = match table {
342            Some(table) => format!(
343                r#"SHOW CREATE {} "{}"."{}"."{}""#,
344                show_type, catalog, schema, table
345            ),
346            None => format!(r#"SHOW CREATE {} "{}"."{}""#, show_type, catalog, schema),
347        };
348        let records = self
349            .database_client
350            .sql_in_public(&sql)
351            .await?
352            .context(EmptyResultSnafu)?;
353        let Value::String(create) = &records[0][1] else {
354            unreachable!()
355        };
356
357        Ok(format!("{};\n", create))
358    }
359
360    async fn export_create_database(&self) -> Result<()> {
361        let timer = Instant::now();
362        let db_names = self.get_db_names().await?;
363        let db_count = db_names.len();
364        let operator = self.build_prefer_fs_operator().await?;
365
366        for schema in db_names {
367            let create_database = self
368                .show_create("DATABASE", &self.catalog, &schema, None)
369                .await?;
370
371            let file_path = self.get_file_path(&schema, "create_database.sql");
372            self.write_to_storage(&operator, &file_path, create_database.into_bytes())
373                .await?;
374
375            info!(
376                "Exported {}.{} database creation SQL to {}",
377                self.catalog,
378                schema,
379                self.storage_type.format_output_path(&file_path)
380            );
381        }
382
383        let elapsed = timer.elapsed();
384        info!("Success {db_count} jobs, cost: {elapsed:?}");
385
386        Ok(())
387    }
388
389    async fn export_create_table(&self) -> Result<()> {
390        let timer = Instant::now();
391        let semaphore = Arc::new(Semaphore::new(self.export_jobs));
392        let db_names = self.get_db_names().await?;
393        let db_count = db_names.len();
394        let operator = Arc::new(self.build_prefer_fs_operator().await?);
395        let mut tasks = Vec::with_capacity(db_names.len());
396
397        for schema in db_names {
398            let semaphore_moved = semaphore.clone();
399            let export_self = self.clone();
400            let operator = operator.clone();
401            tasks.push(async move {
402                let _permit = semaphore_moved.acquire().await.unwrap();
403                let (metric_physical_tables, remaining_tables, views) = export_self
404                    .get_table_list(&export_self.catalog, &schema)
405                    .await?;
406
407                // Create directory if needed for file system storage
408                if !export_self.storage_type.is_remote_storage() {
409                    let db_dir = format!("{}/{}/", export_self.catalog, schema);
410                    operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
411                }
412
413                let file_path = export_self.get_file_path(&schema, "create_tables.sql");
414                let mut content = Vec::new();
415
416                // Add table creation SQL
417                for (c, s, t) in metric_physical_tables.iter().chain(&remaining_tables) {
418                    let create_table = export_self.show_create("TABLE", c, s, Some(t)).await?;
419                    content.extend_from_slice(create_table.as_bytes());
420                }
421
422                // Add view creation SQL
423                for (c, s, v) in &views {
424                    let create_view = export_self.show_create("VIEW", c, s, Some(v)).await?;
425                    content.extend_from_slice(create_view.as_bytes());
426                }
427
428                // Write to storage
429                export_self
430                    .write_to_storage(&operator, &file_path, content)
431                    .await?;
432
433                info!(
434                    "Finished exporting {}.{schema} with {} table schemas to path: {}",
435                    export_self.catalog,
436                    metric_physical_tables.len() + remaining_tables.len() + views.len(),
437                    export_self.storage_type.format_output_path(&file_path)
438                );
439
440                Ok::<(), Error>(())
441            });
442        }
443
444        let success = self.execute_tasks(tasks).await;
445        let elapsed = timer.elapsed();
446        info!("Success {success}/{db_count} jobs, cost: {elapsed:?}");
447
448        Ok(())
449    }
450
451    async fn build_operator(&self) -> Result<ObjectStore> {
452        Ok(self.operator.clone())
453    }
454
455    /// build operator with preference for file system
456    async fn build_prefer_fs_operator(&self) -> Result<ObjectStore> {
457        if self.storage_type.is_remote_storage() && self.ddl_local_dir.is_some() {
458            let root = self.ddl_local_dir.as_ref().unwrap().clone();
459            let op = new_fs_object_store(&root).map_err(|e| Error::Other {
460                source: e,
461                location: snafu::location!(),
462            })?;
463            Ok(op)
464        } else {
465            Ok(self.operator.clone())
466        }
467    }
468
469    async fn export_database_data(&self) -> Result<()> {
470        let timer = Instant::now();
471        let semaphore = Arc::new(Semaphore::new(self.export_jobs));
472        let db_names = self.get_db_names().await?;
473        let db_count = db_names.len();
474        let mut tasks = Vec::with_capacity(db_count);
475        let operator = Arc::new(self.build_operator().await?);
476        let fs_first_operator = Arc::new(self.build_prefer_fs_operator().await?);
477        let with_options = build_with_options(&self.start_time, &self.end_time, self.parallelism);
478
479        for schema in db_names {
480            let semaphore_moved = semaphore.clone();
481            let export_self = self.clone();
482            let with_options_clone = with_options.clone();
483            let operator = operator.clone();
484            let fs_first_operator = fs_first_operator.clone();
485
486            tasks.push(async move {
487                let _permit = semaphore_moved.acquire().await.unwrap();
488
489                // Create directory if not using remote storage
490                if !export_self.storage_type.is_remote_storage() {
491                    let db_dir = format!("{}/{}/", export_self.catalog, schema);
492                    operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
493                }
494
495                let (path, connection_part) = export_self
496                    .storage_type
497                    .get_storage_path(&export_self.catalog, &schema);
498
499                // Execute COPY DATABASE TO command
500                let sql = format!(
501                    r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
502                    export_self.catalog, schema, path, with_options_clone, connection_part
503                );
504
505                // Log SQL command but mask sensitive information
506                let safe_sql = export_self.storage_type.mask_sensitive_info(&sql);
507                info!("Executing sql: {}", safe_sql);
508
509                export_self.database_client.sql_in_public(&sql).await?;
510                info!(
511                    "Finished exporting {}.{} data to {}",
512                    export_self.catalog, schema, path
513                );
514
515                // Create copy_from.sql file
516                let copy_database_from_sql = {
517                    let command_without_connection = format!(
518                        r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({});"#,
519                        export_self.catalog, schema, COPY_PATH_PLACEHOLDER, with_options_clone
520                    );
521
522                    if connection_part.is_empty() {
523                        command_without_connection
524                    } else {
525                        let command_with_connection = format!(
526                            r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
527                            export_self.catalog, schema, path, with_options_clone, connection_part
528                        );
529
530                        format!(
531                            "-- {}\n{}",
532                            command_with_connection, command_without_connection
533                        )
534                    }
535                };
536
537                let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
538                export_self
539                    .write_to_storage(
540                        &fs_first_operator,
541                        &copy_from_path,
542                        copy_database_from_sql.into_bytes(),
543                    )
544                    .await?;
545
546                info!(
547                    "Finished exporting {}.{} copy_from.sql to {}",
548                    export_self.catalog,
549                    schema,
550                    export_self.storage_type.format_output_path(&copy_from_path)
551                );
552
553                Ok::<(), Error>(())
554            });
555        }
556
557        let success = self.execute_tasks(tasks).await;
558        let elapsed = timer.elapsed();
559        info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
560
561        Ok(())
562    }
563
564    fn get_file_path(&self, schema: &str, file_name: &str) -> String {
565        format!("{}/{}/{}", self.catalog, schema, file_name)
566    }
567
568    async fn write_to_storage(
569        &self,
570        op: &ObjectStore,
571        file_path: &str,
572        content: Vec<u8>,
573    ) -> Result<()> {
574        op.write(file_path, content)
575            .await
576            .context(OpenDalSnafu)
577            .map(|_| ())
578    }
579
580    async fn execute_tasks(
581        &self,
582        tasks: Vec<impl std::future::Future<Output = Result<()>>>,
583    ) -> usize {
584        futures::future::join_all(tasks)
585            .await
586            .into_iter()
587            .filter(|r| match r {
588                Ok(_) => true,
589                Err(e) => {
590                    error!(e; "export job failed");
591                    false
592                }
593            })
594            .count()
595    }
596}
597
598#[async_trait]
599impl Tool for Export {
600    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
601        match self.target {
602            ExportTarget::Schema => {
603                self.export_create_database()
604                    .await
605                    .map_err(BoxedError::new)?;
606                self.export_create_table().await.map_err(BoxedError::new)
607            }
608            ExportTarget::Data => self.export_database_data().await.map_err(BoxedError::new),
609            ExportTarget::All => {
610                self.export_create_database()
611                    .await
612                    .map_err(BoxedError::new)?;
613                self.export_create_table().await.map_err(BoxedError::new)?;
614                self.export_database_data().await.map_err(BoxedError::new)
615            }
616        }
617    }
618}
619
620/// Builds the WITH options string for SQL commands, assuming consistent syntax across S3 and local exports.
621fn build_with_options(
622    start_time: &Option<String>,
623    end_time: &Option<String>,
624    parallelism: usize,
625) -> String {
626    let mut options = vec!["format = 'parquet'".to_string()];
627    if let Some(start) = start_time {
628        options.push(format!("start_time = '{}'", start));
629    }
630    if let Some(end) = end_time {
631        options.push(format!("end_time = '{}'", end));
632    }
633    options.push(format!("parallelism = {}", parallelism));
634    options.join(", ")
635}
636
637#[cfg(test)]
638mod tests {
639    use clap::Parser;
640    use common_test_util::temp_dir::create_temp_dir;
641
642    use super::*;
643
644    // ==================== Basic Success Cases ====================
645
646    #[tokio::test]
647    async fn test_export_command_build_with_local_fs() {
648        let temp_dir = create_temp_dir("test_export_local_fs");
649        let output_dir = temp_dir.path().to_str().unwrap();
650
651        let cmd = ExportCommand::parse_from([
652            "export",
653            "--addr",
654            "127.0.0.1:4000",
655            "--output-dir",
656            output_dir,
657        ]);
658
659        let result = cmd.build().await;
660        assert!(result.is_ok());
661    }
662
663    #[tokio::test]
664    async fn test_export_command_build_with_s3_success() {
665        let cmd = ExportCommand::parse_from([
666            "export",
667            "--addr",
668            "127.0.0.1:4000",
669            "--s3",
670            "--s3-bucket",
671            "test-bucket",
672            "--s3-root",
673            "test-root",
674            "--s3-access-key-id",
675            "test-key",
676            "--s3-secret-access-key",
677            "test-secret",
678            // Optional fields
679            "--s3-region",
680            "us-west-2",
681            "--s3-endpoint",
682            "https://s3.amazonaws.com",
683        ]);
684
685        let result = cmd.build().await;
686        assert!(result.is_ok());
687    }
688
689    #[tokio::test]
690    async fn test_export_command_build_with_oss_success() {
691        let cmd = ExportCommand::parse_from([
692            "export",
693            "--addr",
694            "127.0.0.1:4000",
695            "--oss",
696            "--oss-bucket",
697            "test-bucket",
698            "--oss-root",
699            "test-root",
700            "--oss-access-key-id",
701            "test-key-id",
702            "--oss-access-key-secret",
703            "test-secret",
704            "--oss-endpoint",
705            "https://oss.example.com",
706        ]);
707
708        let result = cmd.build().await;
709        assert!(result.is_ok());
710    }
711
712    #[tokio::test]
713    async fn test_export_command_build_with_gcs_success() {
714        let cmd = ExportCommand::parse_from([
715            "export",
716            "--addr",
717            "127.0.0.1:4000",
718            "--gcs",
719            "--gcs-bucket",
720            "test-bucket",
721            "--gcs-root",
722            "test-root",
723            "--gcs-scope",
724            "test-scope",
725            "--gcs-credential-path",
726            "/path/to/credential",
727            "--gcs-credential",
728            "test-credential-content",
729            "--gcs-endpoint",
730            "https://storage.googleapis.com",
731        ]);
732
733        let result = cmd.build().await;
734        assert!(result.is_ok());
735    }
736
737    #[tokio::test]
738    async fn test_export_command_build_with_gcs_adc_success() {
739        // Test GCS with Application Default Credentials (no explicit credentials provided)
740        let cmd = ExportCommand::parse_from([
741            "export",
742            "--addr",
743            "127.0.0.1:4000",
744            "--gcs",
745            "--gcs-bucket",
746            "test-bucket",
747            "--gcs-root",
748            "test-root",
749            "--gcs-scope",
750            "test-scope",
751            // No credential_path or credential
752            // No endpoint (optional)
753        ]);
754
755        let result = cmd.build().await;
756        assert!(result.is_ok());
757    }
758
759    #[tokio::test]
760    async fn test_export_command_build_with_azblob_success() {
761        let cmd = ExportCommand::parse_from([
762            "export",
763            "--addr",
764            "127.0.0.1:4000",
765            "--azblob",
766            "--azblob-container",
767            "test-container",
768            "--azblob-root",
769            "test-root",
770            "--azblob-account-name",
771            "test-account",
772            "--azblob-account-key",
773            "test-key",
774            "--azblob-endpoint",
775            "https://account.blob.core.windows.net",
776        ]);
777
778        let result = cmd.build().await;
779        assert!(result.is_ok());
780    }
781
782    #[tokio::test]
783    async fn test_export_command_build_with_azblob_with_sas_token() {
784        // Test Azure Blob with SAS token
785        let cmd = ExportCommand::parse_from([
786            "export",
787            "--addr",
788            "127.0.0.1:4000",
789            "--azblob",
790            "--azblob-container",
791            "test-container",
792            "--azblob-root",
793            "test-root",
794            "--azblob-account-name",
795            "test-account",
796            "--azblob-account-key",
797            "test-key",
798            "--azblob-endpoint",
799            "https://account.blob.core.windows.net",
800            "--azblob-sas-token",
801            "test-sas-token",
802        ]);
803
804        let result = cmd.build().await;
805        assert!(result.is_ok());
806    }
807
808    // ==================== Gap 1: Parse-time dependency checks ====================
809
810    #[test]
811    fn test_export_command_build_with_conflict() {
812        // Try to enable both S3 and OSS
813        let result =
814            ExportCommand::try_parse_from(["export", "--addr", "127.0.0.1:4000", "--s3", "--oss"]);
815
816        assert!(result.is_err());
817        let err = result.unwrap_err();
818        // clap error for conflicting arguments
819        assert!(err.kind() == clap::error::ErrorKind::ArgumentConflict);
820    }
821
822    #[tokio::test]
823    async fn test_export_command_build_with_s3_no_enable_flag() {
824        // Test that providing S3 config without --s3 flag fails
825        let result = ExportCommand::try_parse_from([
826            "export",
827            "--addr",
828            "127.0.0.1:4000",
829            // Note: no --s3 flag
830            "--s3-bucket",
831            "test-bucket",
832            "--s3-access-key-id",
833            "test-key",
834            "--output-dir",
835            "/tmp/test",
836        ]);
837
838        assert!(result.is_err());
839        let err = result.unwrap_err();
840        assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument);
841        assert!(err.to_string().contains("--s3"));
842    }
843
844    #[tokio::test]
845    async fn test_export_command_build_with_oss_no_enable_flag() {
846        // Test that providing OSS config without --oss flag fails at parse time
847        let result = ExportCommand::try_parse_from([
848            "export",
849            "--addr",
850            "127.0.0.1:4000",
851            "--oss-bucket",
852            "test-bucket",
853            "--output-dir",
854            "/tmp/test",
855        ]);
856
857        assert!(result.is_err());
858        let err = result.unwrap_err();
859        assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument);
860        assert!(err.to_string().contains("--oss"));
861    }
862
863    #[tokio::test]
864    async fn test_export_command_build_with_gcs_no_enable_flag() {
865        // Test that providing GCS config without --gcs flag fails at parse time
866        let result = ExportCommand::try_parse_from([
867            "export",
868            "--addr",
869            "127.0.0.1:4000",
870            "--gcs-bucket",
871            "test-bucket",
872            "--output-dir",
873            "/tmp/test",
874        ]);
875
876        assert!(result.is_err());
877        let err = result.unwrap_err();
878        assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument);
879        assert!(err.to_string().contains("--gcs"));
880    }
881
882    #[tokio::test]
883    async fn test_export_command_build_with_azblob_no_enable_flag() {
884        // Test that providing Azure Blob config without --azblob flag fails at parse time
885        let result = ExportCommand::try_parse_from([
886            "export",
887            "--addr",
888            "127.0.0.1:4000",
889            "--azblob-container",
890            "test-container",
891            "--output-dir",
892            "/tmp/test",
893        ]);
894
895        assert!(result.is_err());
896        let err = result.unwrap_err();
897        assert_eq!(err.kind(), clap::error::ErrorKind::MissingRequiredArgument);
898        assert!(err.to_string().contains("--azblob"));
899    }
900
901    // ==================== Gap 2: Empty string vs missing tests ====================
902
903    #[tokio::test]
904    async fn test_export_command_build_with_s3_empty_access_key() {
905        // Test S3 with empty access key ID (empty string, not missing)
906        let cmd = ExportCommand::parse_from([
907            "export",
908            "--addr",
909            "127.0.0.1:4000",
910            "--s3",
911            "--s3-bucket",
912            "test-bucket",
913            "--s3-root",
914            "test-root",
915            "--s3-access-key-id",
916            "", // Empty string
917            "--s3-secret-access-key",
918            "test-secret",
919            "--s3-region",
920            "us-west-2",
921        ]);
922
923        let result = cmd.build().await;
924        assert!(result.is_err());
925        if let Err(err) = result {
926            assert!(
927                err.to_string().contains("S3 access key ID must be set"),
928                "Actual error: {}",
929                err
930            );
931        }
932    }
933
934    #[tokio::test]
935    async fn test_export_command_build_with_s3_missing_secret_key() {
936        // Test S3 with empty secret access key
937        let cmd = ExportCommand::parse_from([
938            "export",
939            "--addr",
940            "127.0.0.1:4000",
941            "--s3",
942            "--s3-bucket",
943            "test-bucket",
944            "--s3-root",
945            "test-root",
946            "--s3-access-key-id",
947            "test-key",
948            // Missing --s3-secret-access-key
949            "--s3-region",
950            "us-west-2",
951        ]);
952
953        let result = cmd.build().await;
954        assert!(result.is_err());
955        if let Err(err) = result {
956            assert!(
957                err.to_string().contains("S3 secret access key must be set"),
958                "Actual error: {}",
959                err
960            );
961        }
962    }
963
964    #[tokio::test]
965    async fn test_export_command_build_with_s3_empty_root() {
966        // Empty root should be allowed (it's optional path component)
967        let cmd = ExportCommand::parse_from([
968            "export",
969            "--addr",
970            "127.0.0.1:4000",
971            "--s3",
972            "--s3-bucket",
973            "test-bucket",
974            "--s3-root",
975            "", // Empty root is OK
976            "--s3-access-key-id",
977            "test-key",
978            "--s3-secret-access-key",
979            "test-secret",
980            "--s3-region",
981            "us-west-2",
982        ]);
983
984        let result = cmd.build().await;
985        // Should succeed because root is not a required field
986        assert!(
987            result.is_ok(),
988            "Expected success but got: {:?}",
989            result.err()
990        );
991    }
992
993    #[tokio::test]
994    async fn test_export_command_build_with_oss_empty_access_key_id() {
995        // Test OSS with empty access_key_id (empty string, not missing)
996        let cmd = ExportCommand::parse_from([
997            "export",
998            "--addr",
999            "127.0.0.1:4000",
1000            "--oss",
1001            "--oss-bucket",
1002            "test-bucket",
1003            "--oss-access-key-id",
1004            "", // Empty string
1005            "--oss-access-key-secret",
1006            "test-secret",
1007            "--oss-endpoint",
1008            "https://oss.example.com",
1009        ]);
1010
1011        let result = cmd.build().await;
1012        assert!(result.is_err());
1013        if let Err(err) = result {
1014            assert!(
1015                err.to_string().contains("OSS access key ID must be set"),
1016                "Actual error: {}",
1017                err
1018            );
1019        }
1020    }
1021
1022    #[tokio::test]
1023    async fn test_export_command_build_with_oss_missing_endpoint() {
1024        // Missing endpoint
1025        let cmd = ExportCommand::parse_from([
1026            "export",
1027            "--addr",
1028            "127.0.0.1:4000",
1029            "--oss",
1030            "--oss-bucket",
1031            "test-bucket",
1032            "--oss-root",
1033            "test-root",
1034            "--oss-access-key-id",
1035            "test-key-id",
1036            "--oss-access-key-secret",
1037            "test-secret",
1038        ]);
1039
1040        let result = cmd.build().await;
1041        assert!(result.is_err());
1042        if let Err(err) = result {
1043            assert!(
1044                err.to_string().contains("OSS endpoint must be set"),
1045                "Actual error: {}",
1046                err
1047            );
1048        }
1049    }
1050
1051    #[tokio::test]
1052    async fn test_export_command_build_with_oss_multiple_missing_fields() {
1053        // Test OSS with multiple missing required fields
1054        let cmd = ExportCommand::parse_from([
1055            "export",
1056            "--addr",
1057            "127.0.0.1:4000",
1058            "--oss",
1059            "--oss-bucket",
1060            "test-bucket",
1061            // Missing: root, access_key_id, access_key_secret, endpoint
1062        ]);
1063
1064        let result = cmd.build().await;
1065        assert!(result.is_err());
1066        if let Err(err) = result {
1067            let err_str = err.to_string();
1068            // Should mention multiple missing fields
1069            assert!(
1070                err_str.contains("OSS"),
1071                "Error should mention OSS: {}",
1072                err_str
1073            );
1074            assert!(
1075                err_str.contains("must be set"),
1076                "Error should mention required fields: {}",
1077                err_str
1078            );
1079        }
1080    }
1081
1082    #[tokio::test]
1083    async fn test_export_command_build_with_gcs_empty_bucket() {
1084        // Test GCS with empty bucket
1085        let cmd = ExportCommand::parse_from([
1086            "export",
1087            "--addr",
1088            "127.0.0.1:4000",
1089            "--gcs",
1090            "--gcs-bucket",
1091            "", // Empty bucket
1092            "--gcs-root",
1093            "test-root",
1094            "--gcs-scope",
1095            "test-scope",
1096        ]);
1097
1098        let result = cmd.build().await;
1099        assert!(result.is_err());
1100        if let Err(err) = result {
1101            assert!(
1102                err.to_string().contains("GCS bucket must be set"),
1103                "Actual error: {}",
1104                err
1105            );
1106        }
1107    }
1108
1109    #[tokio::test]
1110    async fn test_export_command_build_with_gcs_empty_root() {
1111        // Test GCS when root is missing (should fail as it's required)
1112        let cmd = ExportCommand::parse_from([
1113            "export",
1114            "--addr",
1115            "127.0.0.1:4000",
1116            "--gcs",
1117            "--gcs-bucket",
1118            "test-bucket",
1119            "--gcs-root",
1120            "", // Empty root
1121            "--gcs-scope",
1122            "test-scope",
1123            "--gcs-credential-path",
1124            "/path/to/credential",
1125            "--gcs-credential",
1126            "test-credential",
1127            "--gcs-endpoint",
1128            "https://storage.googleapis.com",
1129        ]);
1130
1131        let result = cmd.build().await;
1132        assert!(result.is_err());
1133        if let Err(err) = result {
1134            assert!(
1135                err.to_string().contains("GCS root must be set"),
1136                "Actual error: {}",
1137                err
1138            );
1139        }
1140    }
1141
1142    #[tokio::test]
1143    async fn test_export_command_build_with_azblob_empty_account_name() {
1144        // Test Azure Blob with empty account_name
1145        let cmd = ExportCommand::parse_from([
1146            "export",
1147            "--addr",
1148            "127.0.0.1:4000",
1149            "--azblob",
1150            "--azblob-container",
1151            "test-container",
1152            "--azblob-root",
1153            "test-root",
1154            "--azblob-account-name",
1155            "", // Empty account name
1156            "--azblob-account-key",
1157            "test-key",
1158            "--azblob-endpoint",
1159            "https://account.blob.core.windows.net",
1160        ]);
1161
1162        let result = cmd.build().await;
1163        assert!(result.is_err());
1164        if let Err(err) = result {
1165            assert!(
1166                err.to_string().contains("AzBlob account name must be set"),
1167                "Actual error: {}",
1168                err
1169            );
1170        }
1171    }
1172
1173    #[tokio::test]
1174    async fn test_export_command_build_with_azblob_missing_account_key() {
1175        // Missing account key
1176        let cmd = ExportCommand::parse_from([
1177            "export",
1178            "--addr",
1179            "127.0.0.1:4000",
1180            "--azblob",
1181            "--azblob-container",
1182            "test-container",
1183            "--azblob-root",
1184            "test-root",
1185            "--azblob-account-name",
1186            "test-account",
1187            "--azblob-endpoint",
1188            "https://account.blob.core.windows.net",
1189        ]);
1190
1191        let result = cmd.build().await;
1192        assert!(result.is_err());
1193        if let Err(err) = result {
1194            assert!(
1195                err.to_string()
1196                    .contains("AzBlob account key (when sas_token is not provided) must be set"),
1197                "Actual error: {}",
1198                err
1199            );
1200        }
1201    }
1202
1203    // ==================== Gap 3: Boundary cases ====================
1204
1205    #[tokio::test]
1206    async fn test_export_command_build_with_no_storage() {
1207        // No output-dir and no backend - should fail
1208        let cmd = ExportCommand::parse_from(["export", "--addr", "127.0.0.1:4000"]);
1209
1210        let result = cmd.build().await;
1211        assert!(result.is_err());
1212        if let Err(err) = result {
1213            assert!(
1214                err.to_string().contains("Output directory not set"),
1215                "Actual error: {}",
1216                err
1217            );
1218        }
1219    }
1220
1221    #[tokio::test]
1222    async fn test_export_command_build_with_s3_minimal_config() {
1223        // S3 with only required fields (no optional fields)
1224        let cmd = ExportCommand::parse_from([
1225            "export",
1226            "--addr",
1227            "127.0.0.1:4000",
1228            "--s3",
1229            "--s3-bucket",
1230            "test-bucket",
1231            "--s3-access-key-id",
1232            "test-key",
1233            "--s3-secret-access-key",
1234            "test-secret",
1235            "--s3-region",
1236            "us-west-2",
1237            // No root, endpoint, or enable_virtual_host_style
1238        ]);
1239
1240        let result = cmd.build().await;
1241        assert!(result.is_ok(), "Minimal S3 config should succeed");
1242    }
1243
1244    #[tokio::test]
1245    async fn test_export_command_build_with_oss_minimal_config() {
1246        // OSS with only required fields
1247        let cmd = ExportCommand::parse_from([
1248            "export",
1249            "--addr",
1250            "127.0.0.1:4000",
1251            "--oss",
1252            "--oss-bucket",
1253            "test-bucket",
1254            "--oss-access-key-id",
1255            "test-key-id",
1256            "--oss-access-key-secret",
1257            "test-secret",
1258            "--oss-endpoint",
1259            "https://oss.example.com",
1260            // No root
1261        ]);
1262
1263        let result = cmd.build().await;
1264        assert!(result.is_ok(), "Minimal OSS config should succeed");
1265    }
1266
1267    #[tokio::test]
1268    async fn test_export_command_build_with_gcs_minimal_config() {
1269        // GCS with only required fields (using ADC)
1270        let cmd = ExportCommand::parse_from([
1271            "export",
1272            "--addr",
1273            "127.0.0.1:4000",
1274            "--gcs",
1275            "--gcs-bucket",
1276            "test-bucket",
1277            "--gcs-root",
1278            "test-root",
1279            "--gcs-scope",
1280            "test-scope",
1281            // No credential_path, credential, or endpoint
1282        ]);
1283
1284        let result = cmd.build().await;
1285        assert!(result.is_ok(), "Minimal GCS config should succeed");
1286    }
1287
1288    #[tokio::test]
1289    async fn test_export_command_build_with_azblob_minimal_config() {
1290        // Azure Blob with only required fields
1291        let cmd = ExportCommand::parse_from([
1292            "export",
1293            "--addr",
1294            "127.0.0.1:4000",
1295            "--azblob",
1296            "--azblob-container",
1297            "test-container",
1298            "--azblob-root",
1299            "test-root",
1300            "--azblob-account-name",
1301            "test-account",
1302            "--azblob-account-key",
1303            "test-key",
1304            "--azblob-endpoint",
1305            "https://account.blob.core.windows.net",
1306            // No sas_token
1307        ]);
1308
1309        let result = cmd.build().await;
1310        assert!(result.is_ok(), "Minimal AzBlob config should succeed");
1311    }
1312
1313    #[tokio::test]
1314    async fn test_export_command_build_with_local_and_s3() {
1315        // Both output-dir and S3 - S3 should take precedence
1316        let temp_dir = create_temp_dir("test_export_local_and_s3");
1317        let output_dir = temp_dir.path().to_str().unwrap();
1318
1319        let cmd = ExportCommand::parse_from([
1320            "export",
1321            "--addr",
1322            "127.0.0.1:4000",
1323            "--output-dir",
1324            output_dir,
1325            "--s3",
1326            "--s3-bucket",
1327            "test-bucket",
1328            "--s3-access-key-id",
1329            "test-key",
1330            "--s3-secret-access-key",
1331            "test-secret",
1332            "--s3-region",
1333            "us-west-2",
1334        ]);
1335
1336        let result = cmd.build().await;
1337        assert!(
1338            result.is_ok(),
1339            "S3 should be selected when both are provided"
1340        );
1341    }
1342
1343    // ==================== Gap 4: Custom validation (Azure Blob) ====================
1344
1345    #[tokio::test]
1346    async fn test_export_command_build_with_azblob_only_sas_token() {
1347        // Azure Blob with sas_token but no account_key - should succeed
1348        let cmd = ExportCommand::parse_from([
1349            "export",
1350            "--addr",
1351            "127.0.0.1:4000",
1352            "--azblob",
1353            "--azblob-container",
1354            "test-container",
1355            "--azblob-root",
1356            "test-root",
1357            "--azblob-account-name",
1358            "test-account",
1359            "--azblob-endpoint",
1360            "https://account.blob.core.windows.net",
1361            "--azblob-sas-token",
1362            "test-sas-token",
1363            // No account_key
1364        ]);
1365
1366        let result = cmd.build().await;
1367        assert!(
1368            result.is_ok(),
1369            "AzBlob with only sas_token should succeed: {:?}",
1370            result.err()
1371        );
1372    }
1373
1374    #[tokio::test]
1375    async fn test_export_command_build_with_azblob_empty_account_key_with_sas() {
1376        // Azure Blob with empty account_key but valid sas_token - should succeed
1377        let cmd = ExportCommand::parse_from([
1378            "export",
1379            "--addr",
1380            "127.0.0.1:4000",
1381            "--azblob",
1382            "--azblob-container",
1383            "test-container",
1384            "--azblob-root",
1385            "test-root",
1386            "--azblob-account-name",
1387            "test-account",
1388            "--azblob-account-key",
1389            "", // Empty account_key is OK if sas_token is provided
1390            "--azblob-endpoint",
1391            "https://account.blob.core.windows.net",
1392            "--azblob-sas-token",
1393            "test-sas-token",
1394        ]);
1395
1396        let result = cmd.build().await;
1397        assert!(
1398            result.is_ok(),
1399            "AzBlob with empty account_key but sas_token should succeed: {:?}",
1400            result.err()
1401        );
1402    }
1403}