Skip to main content

cli/data/import_v2/
command.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Import V2 CLI command.
16
17use std::collections::HashSet;
18use std::path::{Path, PathBuf};
19use std::time::Duration;
20
21use async_trait::async_trait;
22use clap::Parser;
23use common_error::ext::BoxedError;
24use common_telemetry::info;
25use snafu::{OptionExt, ResultExt};
26
27use crate::Tool;
28use crate::common::ObjectStoreConfig;
29use crate::data::export_v2::data::{build_copy_source, execute_copy_database_from};
30use crate::data::export_v2::manifest::{ChunkMeta, ChunkStatus, DataFormat, MANIFEST_VERSION};
31use crate::data::import_v2::coordinator::{
32    ImportResumeConfig, ImportTaskExecutor, build_import_tasks, chunk_has_schema_files,
33    import_with_resume_session_with_progress, prepare_import_resume,
34};
35use crate::data::import_v2::error::{
36    ChunkImportFailedSnafu, EmptyChunkManifestSnafu, ImportStatePathUnavailableSnafu,
37    IncompleteSnapshotSnafu, ManifestVersionMismatchSnafu, MissingChunkDataSnafu, Result,
38    SchemaNotInSnapshotSnafu, SnapshotStorageSnafu,
39};
40use crate::data::import_v2::executor::{DdlExecutor, DdlStatement};
41use crate::data::import_v2::state::{ImportTaskKey, default_state_path};
42use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
43use crate::data::progress::{ProgressMode, build_progress_reporter};
44use crate::data::snapshot_storage::{OpenDalStorage, SnapshotStorage, validate_uri};
45use crate::database::{DatabaseClient, parse_proxy_opts};
46
47/// Import from a snapshot.
48#[derive(Debug, Parser)]
49pub struct ImportV2Command {
50    /// Server address to connect (e.g., 127.0.0.1:4000).
51    #[clap(long)]
52    addr: String,
53
54    /// Source snapshot location (e.g., s3://bucket/path, file:///tmp/backup).
55    #[clap(long)]
56    from: String,
57
58    /// Target catalog name.
59    #[clap(long, default_value = "greptime")]
60    catalog: String,
61
62    /// Schema list to import (default: all in snapshot).
63    /// Can be specified multiple times or comma-separated.
64    #[clap(long, value_delimiter = ',')]
65    schemas: Vec<String>,
66
67    /// Verify without importing (dry-run).
68    #[clap(long)]
69    dry_run: bool,
70
71    /// Progress reporting mode.
72    #[clap(long, value_enum, default_value_t = ProgressMode::Auto)]
73    progress: ProgressMode,
74
75    /// Number of import data tasks to run concurrently on the client (1..=64).
76    #[clap(long, default_value = "1", value_parser = parse_task_parallelism)]
77    task_parallelism: usize,
78
79    /// Override the import resume state file path.
80    ///
81    /// Defaults to a stable path under `~/.greptime/import_state`.
82    #[clap(long)]
83    state_path: Option<PathBuf>,
84
85    /// Basic authentication (user:password).
86    #[clap(long)]
87    auth_basic: Option<String>,
88
89    /// Request timeout.
90    #[clap(long, value_parser = humantime::parse_duration)]
91    timeout: Option<Duration>,
92
93    /// Proxy server address.
94    ///
95    /// If set, it overrides the system proxy unless `--no-proxy` is specified.
96    /// If neither `--proxy` nor `--no-proxy` is set, system proxy (env) may be used.
97    #[clap(long)]
98    proxy: Option<String>,
99
100    /// Disable all proxy usage (ignores `--proxy` and system proxy).
101    ///
102    /// When set and `--proxy` is not provided, this explicitly disables system proxy.
103    #[clap(long)]
104    no_proxy: bool,
105
106    /// Object store configuration for remote storage backends.
107    #[clap(flatten)]
108    storage: ObjectStoreConfig,
109}
110
111impl ImportV2Command {
112    pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
113        // Validate URI format
114        validate_uri(&self.from)
115            .context(SnapshotStorageSnafu)
116            .map_err(BoxedError::new)?;
117
118        // Parse schemas (empty vec means all schemas)
119        let schemas = if self.schemas.is_empty() {
120            None
121        } else {
122            Some(self.schemas.clone())
123        };
124
125        // Build storage
126        let storage = OpenDalStorage::from_uri(&self.from, &self.storage)
127            .context(SnapshotStorageSnafu)
128            .map_err(BoxedError::new)?;
129
130        // Build database client
131        let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
132        let database_client = DatabaseClient::new(
133            self.addr.clone(),
134            self.catalog.clone(),
135            self.auth_basic.clone(),
136            self.timeout.unwrap_or(Duration::from_secs(60)),
137            proxy,
138            self.no_proxy,
139        );
140
141        Ok(Box::new(Import {
142            catalog: self.catalog.clone(),
143            schemas,
144            dry_run: self.dry_run,
145            progress: self.progress,
146            task_parallelism: self.task_parallelism,
147            state_path: self.state_path.clone(),
148            snapshot_uri: self.from.clone(),
149            storage_config: self.storage.clone(),
150            storage: Box::new(storage),
151            database_client,
152        }))
153    }
154}
155
156/// Resolves the import resume state file path. When `override_path` is set it is
157/// used verbatim; otherwise the stable default under `~/.greptime/import_state`
158/// is derived from the import identity.
159fn resolve_state_path(
160    override_path: Option<&Path>,
161    snapshot_id: &str,
162    target_addr: &str,
163    catalog: &str,
164    schemas: &[String],
165) -> Result<PathBuf> {
166    if let Some(path) = override_path {
167        return Ok(path.to_path_buf());
168    }
169    default_state_path(snapshot_id, target_addr, catalog, schemas).context(
170        ImportStatePathUnavailableSnafu {
171            snapshot_id: snapshot_id.to_string(),
172        },
173    )
174}
175
176fn parse_task_parallelism(value: &str) -> std::result::Result<usize, String> {
177    let parallelism = value
178        .parse::<usize>()
179        .map_err(|_| "task parallelism must be an integer between 1 and 64".to_string())?;
180    if (1..=64).contains(&parallelism) {
181        Ok(parallelism)
182    } else {
183        Err("task parallelism must be between 1 and 64".to_string())
184    }
185}
186
187/// Import tool implementation.
188pub struct Import {
189    catalog: String,
190    schemas: Option<Vec<String>>,
191    dry_run: bool,
192    progress: ProgressMode,
193    task_parallelism: usize,
194    state_path: Option<PathBuf>,
195    snapshot_uri: String,
196    storage_config: ObjectStoreConfig,
197    storage: Box<dyn SnapshotStorage>,
198    database_client: DatabaseClient,
199}
200
201#[async_trait]
202impl Tool for Import {
203    async fn do_work(&self) -> std::result::Result<(), BoxedError> {
204        self.run().await.map_err(BoxedError::new)
205    }
206}
207
208impl Import {
209    async fn run(&self) -> Result<()> {
210        // 1. Read manifest
211        let manifest = self
212            .storage
213            .read_manifest()
214            .await
215            .context(SnapshotStorageSnafu)?;
216
217        info!(
218            "Loading snapshot: {} (version: {}, schema_only: {})",
219            manifest.snapshot_id, manifest.version, manifest.schema_only
220        );
221
222        // Check version compatibility
223        if manifest.version != MANIFEST_VERSION {
224            return ManifestVersionMismatchSnafu {
225                expected: MANIFEST_VERSION,
226                found: manifest.version,
227            }
228            .fail();
229        }
230
231        info!("Snapshot contains {} schema(s)", manifest.schemas.len());
232
233        // 2. Determine schemas to import
234        let schemas_to_import = match &self.schemas {
235            Some(filter) => canonicalize_schema_filter(filter, &manifest.schemas)?,
236            None => manifest.schemas.clone(),
237        };
238
239        info!("Importing schemas: {:?}", schemas_to_import);
240
241        // 3. Read DDL statements
242        let ddl_statements = self.read_ddl_statements(&schemas_to_import).await?;
243
244        info!("Generated {} DDL statements", ddl_statements.len());
245
246        let data_tasks = if !manifest.schema_only && !manifest.chunks.is_empty() {
247            validate_data_snapshot(self.storage.as_ref(), &manifest.chunks, &schemas_to_import)
248                .await?;
249            build_import_tasks(&manifest.chunks, &schemas_to_import)
250        } else {
251            Vec::new()
252        };
253
254        // 4. Dry-run mode: print DDL and exit
255        if self.dry_run {
256            info!("Dry-run mode - DDL statements to execute:");
257            println!();
258            for (i, stmt) in ddl_statements.iter().enumerate() {
259                println!("-- Statement {}", i + 1);
260                println!("{};", stmt.sql);
261                println!();
262            }
263            if !manifest.schema_only && !manifest.chunks.is_empty() {
264                for line in format_data_import_plan(&manifest.chunks, &schemas_to_import) {
265                    println!("{line}");
266                }
267                println!();
268            }
269            return Ok(());
270        }
271
272        let mut resume_session = if !data_tasks.is_empty() {
273            let state_path = resolve_state_path(
274                self.state_path.as_deref(),
275                &manifest.snapshot_id.to_string(),
276                self.database_client.addr(),
277                &self.catalog,
278                &schemas_to_import,
279            )?;
280            Some(
281                prepare_import_resume(ImportResumeConfig {
282                    snapshot_id: manifest.snapshot_id.to_string(),
283                    target_addr: self.database_client.addr().to_string(),
284                    catalog: self.catalog.clone(),
285                    schemas: schemas_to_import.clone(),
286                    state_path,
287                    tasks: data_tasks,
288                    task_parallelism: self.task_parallelism,
289                })
290                .await?,
291            )
292        } else {
293            None
294        };
295
296        let skip_ddl = resume_session
297            .as_ref()
298            .map(|session| session.should_skip_ddl())
299            .unwrap_or(false);
300
301        // 5. Execute DDL unless a previous run already completed it.
302        let ddl_executed = if skip_ddl {
303            info!(
304                "Existing import state has DDL marked completed; skipping DDL execution and resuming data import"
305            );
306            false
307        } else {
308            let executor = DdlExecutor::new(&self.database_client);
309            executor.execute_strict(&ddl_statements).await?;
310            if let Some(session) = resume_session.as_mut() {
311                session.mark_ddl_completed().await?;
312            }
313            true
314        };
315
316        if let Some(resume_session) = resume_session {
317            let executor = CopyDatabaseImportTaskExecutor {
318                import: self,
319                format: manifest.format,
320            };
321            let progress = build_progress_reporter(self.progress);
322            import_with_resume_session_with_progress(resume_session, &executor, progress.as_ref())
323                .await?;
324        }
325
326        if ddl_executed {
327            info!(
328                "Import completed: {} DDL statements executed",
329                ddl_statements.len()
330            );
331        } else {
332            info!("Import completed: DDL execution skipped");
333        }
334
335        Ok(())
336    }
337
338    async fn read_ddl_statements(&self, schemas: &[String]) -> Result<Vec<DdlStatement>> {
339        let mut statements = Vec::new();
340        for schema in schemas {
341            let path = ddl_path_for_schema(schema);
342            let content = self
343                .storage
344                .read_text(&path)
345                .await
346                .context(SnapshotStorageSnafu)?;
347            statements.extend(
348                parse_ddl_statements(&content)
349                    .into_iter()
350                    .map(|sql| ddl_statement_for_schema(schema, sql)),
351            );
352        }
353
354        Ok(statements)
355    }
356}
357
358struct CopyDatabaseImportTaskExecutor<'a> {
359    import: &'a Import,
360    format: DataFormat,
361}
362
363#[async_trait]
364impl ImportTaskExecutor for CopyDatabaseImportTaskExecutor<'_> {
365    async fn import_task(&self, task: &ImportTaskKey) -> Result<()> {
366        let source = build_copy_source(
367            &self.import.snapshot_uri,
368            &self.import.storage_config,
369            &task.schema,
370            task.chunk_id,
371        )
372        .context(ChunkImportFailedSnafu {
373            chunk_id: task.chunk_id,
374            schema: task.schema.clone(),
375        })?;
376
377        execute_copy_database_from(
378            &self.import.database_client,
379            &self.import.catalog,
380            &task.schema,
381            &source,
382            self.format,
383        )
384        .await
385        .context(ChunkImportFailedSnafu {
386            chunk_id: task.chunk_id,
387            schema: task.schema.clone(),
388        })
389    }
390}
391
392fn parse_ddl_statements(content: &str) -> Vec<String> {
393    let mut statements = Vec::new();
394    let mut current = String::new();
395    let mut chars = content.chars().peekable();
396    let mut in_single_quote = false;
397    let mut in_double_quote = false;
398    let mut in_line_comment = false;
399    let mut in_block_comment = false;
400
401    while let Some(ch) = chars.next() {
402        if in_line_comment {
403            if ch == '\n' {
404                in_line_comment = false;
405                current.push('\n');
406            }
407            continue;
408        }
409
410        if in_block_comment {
411            if ch == '*' && chars.peek() == Some(&'/') {
412                chars.next();
413                in_block_comment = false;
414            }
415            continue;
416        }
417
418        if in_single_quote {
419            current.push(ch);
420            if ch == '\'' {
421                if chars.peek() == Some(&'\'') {
422                    current.push(chars.next().expect("peeked quote must exist"));
423                } else {
424                    in_single_quote = false;
425                }
426            }
427            continue;
428        }
429
430        if in_double_quote {
431            current.push(ch);
432            if ch == '"' {
433                if chars.peek() == Some(&'"') {
434                    current.push(chars.next().expect("peeked quote must exist"));
435                } else {
436                    in_double_quote = false;
437                }
438            }
439            continue;
440        }
441
442        match ch {
443            '-' if chars.peek() == Some(&'-') => {
444                chars.next();
445                in_line_comment = true;
446            }
447            '/' if chars.peek() == Some(&'*') => {
448                chars.next();
449                in_block_comment = true;
450            }
451            '\'' => {
452                in_single_quote = true;
453                current.push(ch);
454            }
455            '"' => {
456                in_double_quote = true;
457                current.push(ch);
458            }
459            ';' => {
460                let statement = current.trim();
461                if !statement.is_empty() {
462                    statements.push(statement.to_string());
463                }
464                current.clear();
465            }
466            _ => current.push(ch),
467        }
468    }
469
470    let statement = current.trim();
471    if !statement.is_empty() {
472        statements.push(statement.to_string());
473    }
474
475    statements
476}
477
478fn ddl_statement_for_schema(schema: &str, sql: String) -> DdlStatement {
479    if is_schema_scoped_statement(&sql) {
480        DdlStatement::with_execution_schema(sql, schema.to_string())
481    } else {
482        DdlStatement::new(sql)
483    }
484}
485
486fn is_schema_scoped_statement(sql: &str) -> bool {
487    let trimmed = sql.trim_start();
488    if !starts_with_keyword(trimmed, "CREATE") {
489        return false;
490    }
491
492    let Some(rest) = trimmed.get("CREATE".len()..) else {
493        return false;
494    };
495    let mut rest = rest.trim_start();
496    if starts_with_keyword(rest, "OR") {
497        let Some(next) = rest.get("OR".len()..) else {
498            return false;
499        };
500        rest = next.trim_start();
501        if !starts_with_keyword(rest, "REPLACE") {
502            return false;
503        }
504        let Some(next) = rest.get("REPLACE".len()..) else {
505            return false;
506        };
507        rest = next.trim_start();
508    }
509
510    if starts_with_keyword(rest, "EXTERNAL") {
511        let Some(next) = rest.get("EXTERNAL".len()..) else {
512            return false;
513        };
514        rest = next.trim_start();
515    }
516
517    starts_with_keyword(rest, "TABLE") || starts_with_keyword(rest, "VIEW")
518}
519
520fn starts_with_keyword(input: &str, keyword: &str) -> bool {
521    input
522        .get(0..keyword.len())
523        .map(|s| s.eq_ignore_ascii_case(keyword))
524        .unwrap_or(false)
525        && input
526            .as_bytes()
527            .get(keyword.len())
528            .map(|b| !b.is_ascii_alphanumeric() && *b != b'_')
529            .unwrap_or(true)
530}
531
532fn canonicalize_schema_filter(
533    filter: &[String],
534    manifest_schemas: &[String],
535) -> Result<Vec<String>> {
536    let mut canonicalized = Vec::new();
537    let mut seen = HashSet::new();
538
539    for schema in filter {
540        let canonical = manifest_schemas
541            .iter()
542            .find(|candidate| candidate.eq_ignore_ascii_case(schema))
543            .cloned()
544            .ok_or_else(|| {
545                SchemaNotInSnapshotSnafu {
546                    schema: schema.clone(),
547                }
548                .build()
549            })?;
550
551        if seen.insert(canonical.to_ascii_lowercase()) {
552            canonicalized.push(canonical);
553        }
554    }
555
556    Ok(canonicalized)
557}
558
559fn validate_chunk_statuses(chunks: &[ChunkMeta]) -> Result<()> {
560    let invalid_chunk = chunks
561        .iter()
562        .find(|chunk| !matches!(chunk.status, ChunkStatus::Completed | ChunkStatus::Skipped));
563
564    if let Some(chunk) = invalid_chunk {
565        return IncompleteSnapshotSnafu {
566            chunk_id: chunk.id,
567            status: chunk.status,
568        }
569        .fail();
570    }
571
572    Ok(())
573}
574
575fn format_data_import_plan(chunks: &[ChunkMeta], schemas: &[String]) -> Vec<String> {
576    let mut lines = vec!["-- Data import plan:".to_string()];
577    for chunk in chunks {
578        lines.push(format!("-- Chunk {}: {:?}", chunk.id, chunk.status));
579        for schema in schemas {
580            if chunk_has_schema_files(chunk, schema) {
581                lines.push(format!("--   {} -> COPY DATABASE FROM", schema));
582            }
583        }
584    }
585    lines
586}
587
588async fn validate_data_snapshot(
589    storage: &dyn SnapshotStorage,
590    chunks: &[ChunkMeta],
591    schemas: &[String],
592) -> Result<()> {
593    validate_chunk_statuses(chunks)?;
594    let actual_prefixes = collect_chunk_data_prefixes(storage).await?;
595
596    for chunk in chunks {
597        if chunk.status == ChunkStatus::Skipped {
598            continue;
599        }
600        if chunk.files.is_empty() {
601            return EmptyChunkManifestSnafu { chunk_id: chunk.id }.fail();
602        }
603        for schema in schemas {
604            validate_chunk_schema_files(chunk, schema, &actual_prefixes)?;
605        }
606    }
607
608    Ok(())
609}
610
611async fn collect_chunk_data_prefixes(storage: &dyn SnapshotStorage) -> Result<HashSet<String>> {
612    let files = storage
613        .list_files_recursive("data/")
614        .await
615        .context(SnapshotStorageSnafu)?;
616    let mut prefixes = HashSet::new();
617
618    for path in files {
619        let normalized = path.trim_start_matches('/');
620        let mut parts = normalized.splitn(4, '/');
621        let Some(root) = parts.next() else {
622            continue;
623        };
624        let Some(schema) = parts.next() else {
625            continue;
626        };
627        let Some(chunk_id) = parts.next() else {
628            continue;
629        };
630        if root != "data" {
631            continue;
632        }
633        prefixes.insert(format!("data/{schema}/{chunk_id}/"));
634    }
635
636    Ok(prefixes)
637}
638
639fn validate_chunk_schema_files(
640    chunk: &ChunkMeta,
641    schema: &str,
642    actual_prefixes: &HashSet<String>,
643) -> Result<bool> {
644    if !chunk_has_schema_files(chunk, schema) {
645        return Ok(false);
646    }
647
648    let prefix = data_dir_for_schema_chunk(schema, chunk.id);
649    if !actual_prefixes.contains(&prefix) {
650        return MissingChunkDataSnafu {
651            chunk_id: chunk.id,
652            schema: schema.to_string(),
653            path: prefix,
654        }
655        .fail();
656    }
657
658    Ok(true)
659}
660
661#[cfg(test)]
662mod tests {
663    use std::collections::{HashMap, HashSet};
664
665    use async_trait::async_trait;
666
667    use super::*;
668    use crate::data::export_v2::manifest::{ChunkMeta, ChunkStatus, Manifest, TimeRange};
669    use crate::data::export_v2::schema::SchemaSnapshot;
670    use crate::data::snapshot_storage::SnapshotStorage;
671
672    struct StubStorage {
673        manifest: Manifest,
674        files_by_prefix: HashMap<String, Vec<String>>,
675    }
676
677    #[async_trait]
678    impl SnapshotStorage for StubStorage {
679        async fn exists(&self) -> crate::data::export_v2::error::Result<bool> {
680            Ok(true)
681        }
682
683        async fn read_manifest(&self) -> crate::data::export_v2::error::Result<Manifest> {
684            Ok(self.manifest.clone())
685        }
686
687        async fn write_manifest(
688            &self,
689            _manifest: &Manifest,
690        ) -> crate::data::export_v2::error::Result<()> {
691            unimplemented!("not needed in import_v2::command tests")
692        }
693
694        async fn read_text(&self, _path: &str) -> crate::data::export_v2::error::Result<String> {
695            unimplemented!("not needed in import_v2::command tests")
696        }
697
698        async fn write_text(
699            &self,
700            _path: &str,
701            _content: &str,
702        ) -> crate::data::export_v2::error::Result<()> {
703            unimplemented!("not needed in import_v2::command tests")
704        }
705
706        async fn write_schema(
707            &self,
708            _snapshot: &SchemaSnapshot,
709        ) -> crate::data::export_v2::error::Result<()> {
710            unimplemented!("not needed in import_v2::command tests")
711        }
712
713        async fn create_dir_all(&self, _path: &str) -> crate::data::export_v2::error::Result<()> {
714            unimplemented!("not needed in import_v2::command tests")
715        }
716
717        async fn list_files_recursive(
718            &self,
719            prefix: &str,
720        ) -> crate::data::export_v2::error::Result<Vec<String>> {
721            Ok(self
722                .files_by_prefix
723                .iter()
724                .filter(|(candidate, _)| candidate.starts_with(prefix))
725                .flat_map(|(_, files)| files.clone())
726                .collect())
727        }
728
729        async fn delete_snapshot(&self) -> crate::data::export_v2::error::Result<()> {
730            unimplemented!("not needed in import_v2::command tests")
731        }
732    }
733
734    fn parse_command(extra: &[&str]) -> ImportV2Command {
735        let mut args = vec![
736            "import-v2",
737            "--addr",
738            "127.0.0.1:4000",
739            "--from",
740            "file:///tmp/snapshot",
741        ];
742        args.extend_from_slice(extra);
743        ImportV2Command::try_parse_from(args).expect("command should parse")
744    }
745
746    #[test]
747    fn test_progress_mode_defaults_to_auto() {
748        assert_eq!(parse_command(&[]).progress, ProgressMode::Auto);
749    }
750
751    #[test]
752    fn test_progress_mode_parses_explicit_values() {
753        assert_eq!(
754            parse_command(&["--progress", "always"]).progress,
755            ProgressMode::Always
756        );
757        assert_eq!(
758            parse_command(&["--progress", "never"]).progress,
759            ProgressMode::Never
760        );
761        assert_eq!(
762            parse_command(&["--progress", "auto"]).progress,
763            ProgressMode::Auto
764        );
765    }
766
767    #[test]
768    fn test_progress_mode_rejects_unknown_value() {
769        assert!(
770            ImportV2Command::try_parse_from([
771                "import-v2",
772                "--addr",
773                "127.0.0.1:4000",
774                "--from",
775                "file:///tmp/snapshot",
776                "--progress",
777                "bogus",
778            ])
779            .is_err()
780        );
781    }
782
783    #[test]
784    fn test_task_parallelism_defaults_to_one() {
785        assert_eq!(parse_command(&[]).task_parallelism, 1);
786    }
787
788    #[test]
789    fn test_task_parallelism_parses_valid_values() {
790        assert_eq!(
791            parse_command(&["--task-parallelism", "2"]).task_parallelism,
792            2
793        );
794        assert_eq!(
795            parse_command(&["--task-parallelism", "64"]).task_parallelism,
796            64
797        );
798    }
799
800    #[test]
801    fn test_state_path_defaults_to_none() {
802        assert_eq!(parse_command(&[]).state_path, None);
803    }
804
805    #[test]
806    fn test_state_path_parses_explicit_value() {
807        assert_eq!(
808            parse_command(&["--state-path", "/tmp/import_state.json"]).state_path,
809            Some(PathBuf::from("/tmp/import_state.json"))
810        );
811    }
812
813    #[test]
814    fn test_resolve_state_path_prefers_override() {
815        let override_path = PathBuf::from("/tmp/custom_import_state.json");
816        let resolved = resolve_state_path(
817            Some(override_path.as_path()),
818            "snapshot-1",
819            "127.0.0.1:4000",
820            "greptime",
821            &["public".to_string()],
822        )
823        .unwrap();
824        assert_eq!(resolved, override_path);
825    }
826
827    #[test]
828    fn test_resolve_state_path_uses_default_when_absent() {
829        let resolved = resolve_state_path(
830            None,
831            "snapshot-1",
832            "127.0.0.1:4000",
833            "greptime",
834            &["public".to_string()],
835        )
836        .unwrap();
837        let expected = default_state_path(
838            "snapshot-1",
839            "127.0.0.1:4000",
840            "greptime",
841            &["public".to_string()],
842        )
843        .unwrap();
844        assert_eq!(resolved, expected);
845    }
846
847    #[test]
848    fn test_task_parallelism_rejects_invalid_values() {
849        for value in ["0", "65", "abc"] {
850            assert!(
851                ImportV2Command::try_parse_from([
852                    "import-v2",
853                    "--addr",
854                    "127.0.0.1:4000",
855                    "--from",
856                    "file:///tmp/snapshot",
857                    "--task-parallelism",
858                    value,
859                ])
860                .is_err(),
861                "value {value} should be rejected"
862            );
863        }
864    }
865
866    #[test]
867    fn test_parse_ddl_statements() {
868        let content = r#"
869-- Schema: public
870CREATE DATABASE public;
871CREATE TABLE t (ts TIMESTAMP TIME INDEX, host STRING, PRIMARY KEY (host)) ENGINE=mito;
872
873-- comment
874CREATE VIEW v AS SELECT * FROM t;
875"#;
876        let statements = parse_ddl_statements(content);
877        assert_eq!(statements.len(), 3);
878        assert!(statements[0].starts_with("CREATE DATABASE public"));
879        assert!(statements[1].starts_with("CREATE TABLE t"));
880        assert!(statements[2].starts_with("CREATE VIEW v"));
881    }
882
883    #[test]
884    fn test_parse_ddl_statements_preserves_semicolons_in_string_literals() {
885        let content = r#"
886CREATE TABLE t (
887    host STRING DEFAULT 'a;b'
888);
889CREATE VIEW v AS SELECT ';' AS marker;
890"#;
891
892        let statements = parse_ddl_statements(content);
893
894        assert_eq!(statements.len(), 2);
895        assert!(statements[0].contains("'a;b'"));
896        assert!(statements[1].contains("';' AS marker"));
897    }
898
899    #[test]
900    fn test_parse_ddl_statements_handles_comments_without_splitting() {
901        let content = r#"
902-- leading comment
903CREATE TABLE t (ts TIMESTAMP TIME INDEX); /* block; comment */
904CREATE VIEW v AS SELECT 1;
905"#;
906
907        let statements = parse_ddl_statements(content);
908
909        assert_eq!(statements.len(), 2);
910        assert!(statements[0].starts_with("CREATE TABLE t"));
911        assert!(statements[1].starts_with("CREATE VIEW v"));
912    }
913
914    #[test]
915    fn test_canonicalize_schema_filter_uses_manifest_casing() {
916        let filter = vec!["TEST_DB".to_string(), "PUBLIC".to_string()];
917        let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
918
919        let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
920
921        assert_eq!(canonicalized, vec!["test_db", "public"]);
922    }
923
924    #[test]
925    fn test_canonicalize_schema_filter_dedupes_case_insensitive_matches() {
926        let filter = vec![
927            "TEST_DB".to_string(),
928            "test_db".to_string(),
929            "PUBLIC".to_string(),
930            "public".to_string(),
931        ];
932        let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
933
934        let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
935
936        assert_eq!(canonicalized, vec!["test_db", "public"]);
937    }
938
939    #[test]
940    fn test_canonicalize_schema_filter_rejects_missing_schema() {
941        let filter = vec!["missing".to_string()];
942        let manifest_schemas = vec!["test_db".to_string()];
943
944        let error = canonicalize_schema_filter(&filter, &manifest_schemas)
945            .expect_err("missing schema should fail")
946            .to_string();
947
948        assert!(error.contains("missing"));
949    }
950
951    #[test]
952    fn test_ddl_statement_for_schema_create_table_uses_execution_schema() {
953        let stmt = ddl_statement_for_schema(
954            "test_db",
955            "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX) ENGINE=mito".to_string(),
956        );
957        assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
958    }
959
960    #[test]
961    fn test_ddl_statement_for_schema_create_view_uses_execution_schema() {
962        let stmt = ddl_statement_for_schema(
963            "test_db",
964            "CREATE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
965        );
966        assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
967    }
968
969    #[test]
970    fn test_ddl_statement_for_schema_create_or_replace_view_uses_execution_schema() {
971        let stmt = ddl_statement_for_schema(
972            "test_db",
973            "CREATE OR REPLACE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
974        );
975        assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
976    }
977
978    #[test]
979    fn test_ddl_statement_for_schema_create_external_table_uses_execution_schema() {
980        let stmt = ddl_statement_for_schema(
981            "test_db",
982            "CREATE EXTERNAL TABLE IF NOT EXISTS ext_metrics (ts TIMESTAMP TIME INDEX) ENGINE=file"
983                .to_string(),
984        );
985        assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
986    }
987
988    #[test]
989    fn test_ddl_statement_for_schema_create_database_uses_public_context() {
990        let stmt = ddl_statement_for_schema("test_db", "CREATE DATABASE test_db".to_string());
991        assert_eq!(stmt.execution_schema, None);
992    }
993
994    #[test]
995    fn test_starts_with_keyword_requires_word_boundary() {
996        assert!(starts_with_keyword("CREATE TABLE t", "CREATE"));
997        assert!(!starts_with_keyword("CREATED TABLE t", "CREATE"));
998        assert!(!starts_with_keyword("TABLESPACE foo", "TABLE"));
999    }
1000
1001    #[test]
1002    fn test_validate_chunk_statuses_rejects_failed_chunk() {
1003        let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
1004        failed.status = ChunkStatus::Failed;
1005
1006        let error = validate_chunk_statuses(&[failed]).expect_err("failed chunk should error");
1007        assert!(error.to_string().contains("Incomplete snapshot"));
1008    }
1009
1010    #[test]
1011    fn test_validate_chunk_statuses_accepts_completed_and_skipped_chunks() {
1012        let mut completed = ChunkMeta::new(1, TimeRange::unbounded());
1013        completed.status = ChunkStatus::Completed;
1014        let skipped = ChunkMeta::skipped(2, TimeRange::unbounded());
1015
1016        assert!(validate_chunk_statuses(&[completed, skipped]).is_ok());
1017    }
1018
1019    #[test]
1020    fn test_chunk_has_schema_files_matches_encoded_schema_prefix() {
1021        let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1022        chunk.files = vec![
1023            "data/public/7/a.parquet".to_string(),
1024            "data/%E6%B5%8B%E8%AF%95/7/b.parquet".to_string(),
1025        ];
1026
1027        assert!(chunk_has_schema_files(&chunk, "public"));
1028        assert!(chunk_has_schema_files(&chunk, "测试"));
1029        assert!(!chunk_has_schema_files(&chunk, "metrics"));
1030    }
1031
1032    #[test]
1033    fn test_format_data_import_plan_includes_matching_schemas_only() {
1034        let mut completed = ChunkMeta::new(1, TimeRange::unbounded());
1035        completed.status = ChunkStatus::Completed;
1036        completed.files = vec![
1037            "data/public/1/a.parquet".to_string(),
1038            "data/%E6%B5%8B%E8%AF%95/1/b.parquet".to_string(),
1039        ];
1040        let skipped = ChunkMeta::skipped(2, TimeRange::unbounded());
1041
1042        let lines = format_data_import_plan(
1043            &[completed, skipped],
1044            &[
1045                "public".to_string(),
1046                "测试".to_string(),
1047                "metrics".to_string(),
1048            ],
1049        );
1050
1051        assert_eq!(lines[0], "-- Data import plan:");
1052        assert!(lines.contains(&"-- Chunk 1: Completed".to_string()));
1053        assert!(lines.contains(&"--   public -> COPY DATABASE FROM".to_string()));
1054        assert!(lines.contains(&"--   测试 -> COPY DATABASE FROM".to_string()));
1055        assert!(!lines.contains(&"--   metrics -> COPY DATABASE FROM".to_string()));
1056        assert!(lines.contains(&"-- Chunk 2: Skipped".to_string()));
1057    }
1058
1059    #[tokio::test]
1060    async fn test_collect_chunk_data_prefixes_indexes_present_prefixes() {
1061        let storage = StubStorage {
1062            manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1063            files_by_prefix: HashMap::from([
1064                (
1065                    "data/public/7/".to_string(),
1066                    vec!["data/public/7/a.parquet".to_string()],
1067                ),
1068                (
1069                    "data/%E6%B5%8B%E8%AF%95/9/".to_string(),
1070                    vec!["data/%E6%B5%8B%E8%AF%95/9/b.parquet".to_string()],
1071                ),
1072            ]),
1073        };
1074
1075        let prefixes = collect_chunk_data_prefixes(&storage).await.unwrap();
1076
1077        assert!(prefixes.contains("data/public/7/"));
1078        assert!(prefixes.contains("data/%E6%B5%8B%E8%AF%95/9/"));
1079    }
1080
1081    #[test]
1082    fn test_validate_chunk_schema_files_accepts_present_prefix() {
1083        let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1084        chunk.files = vec!["data/public/7/a.parquet".to_string()];
1085        let actual_prefixes = HashSet::from(["data/public/7/".to_string()]);
1086
1087        assert!(validate_chunk_schema_files(&chunk, "public", &actual_prefixes).unwrap());
1088    }
1089
1090    #[test]
1091    fn test_validate_chunk_schema_files_rejects_missing_prefix() {
1092        let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1093        chunk.files = vec!["data/public/7/a.parquet".to_string()];
1094
1095        let error = validate_chunk_schema_files(&chunk, "public", &HashSet::new())
1096            .expect_err("missing chunk prefix should fail")
1097            .to_string();
1098        assert!(error.contains("marked completed but no files were found"));
1099    }
1100
1101    #[test]
1102    fn test_validate_chunk_schema_files_skips_absent_schema() {
1103        let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1104        chunk.files = vec!["data/public/7/a.parquet".to_string()];
1105
1106        assert!(!validate_chunk_schema_files(&chunk, "metrics", &HashSet::new()).unwrap());
1107    }
1108
1109    #[tokio::test]
1110    async fn test_validate_data_snapshot_rejects_failed_chunk_before_dry_run() {
1111        let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
1112        failed.status = ChunkStatus::Failed;
1113
1114        let storage = StubStorage {
1115            manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1116            files_by_prefix: HashMap::new(),
1117        };
1118
1119        let error = validate_data_snapshot(&storage, &[failed], &["public".to_string()])
1120            .await
1121            .expect_err("failed chunk should reject dry-run validation")
1122            .to_string();
1123        assert!(error.contains("Incomplete snapshot"));
1124    }
1125
1126    #[tokio::test]
1127    async fn test_validate_data_snapshot_rejects_missing_chunk_prefix_before_dry_run() {
1128        let mut completed = ChunkMeta::new(7, TimeRange::unbounded());
1129        completed.status = ChunkStatus::Completed;
1130        completed.files = vec!["data/public/7/a.parquet".to_string()];
1131
1132        let storage = StubStorage {
1133            manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1134            files_by_prefix: HashMap::new(),
1135        };
1136
1137        let error = validate_data_snapshot(&storage, &[completed], &["public".to_string()])
1138            .await
1139            .expect_err("missing chunk prefix should reject dry-run validation")
1140            .to_string();
1141        assert!(error.contains("marked completed but no files were found"));
1142    }
1143
1144    #[tokio::test]
1145    async fn test_validate_data_snapshot_rejects_completed_chunk_with_empty_manifest() {
1146        let mut completed = ChunkMeta::new(7, TimeRange::unbounded());
1147        completed.status = ChunkStatus::Completed;
1148
1149        let storage = StubStorage {
1150            manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1151            files_by_prefix: HashMap::new(),
1152        };
1153
1154        let error = validate_data_snapshot(&storage, &[completed], &["public".to_string()])
1155            .await
1156            .expect_err("empty completed chunk should reject validation")
1157            .to_string();
1158        assert!(error.contains("file manifest is empty"));
1159    }
1160}