1use std::collections::HashSet;
18use std::path::{Path, PathBuf};
19use std::time::Duration;
20
21use async_trait::async_trait;
22use clap::Parser;
23use common_error::ext::BoxedError;
24use common_telemetry::info;
25use snafu::{OptionExt, ResultExt};
26
27use crate::Tool;
28use crate::common::ObjectStoreConfig;
29use crate::data::export_v2::data::{build_copy_source, execute_copy_database_from};
30use crate::data::export_v2::manifest::{ChunkMeta, ChunkStatus, DataFormat, MANIFEST_VERSION};
31use crate::data::import_v2::coordinator::{
32 ImportResumeConfig, ImportTaskExecutor, build_import_tasks, chunk_has_schema_files,
33 import_with_resume_session_with_progress, prepare_import_resume,
34};
35use crate::data::import_v2::error::{
36 ChunkImportFailedSnafu, EmptyChunkManifestSnafu, ImportStatePathUnavailableSnafu,
37 IncompleteSnapshotSnafu, ManifestVersionMismatchSnafu, MissingChunkDataSnafu, Result,
38 SchemaNotInSnapshotSnafu, SnapshotStorageSnafu,
39};
40use crate::data::import_v2::executor::{DdlExecutor, DdlStatement};
41use crate::data::import_v2::state::{ImportTaskKey, default_state_path};
42use crate::data::path::{data_dir_for_schema_chunk, ddl_path_for_schema};
43use crate::data::progress::{ProgressMode, build_progress_reporter};
44use crate::data::snapshot_storage::{OpenDalStorage, SnapshotStorage, validate_uri};
45use crate::database::{DatabaseClient, parse_proxy_opts};
46
47#[derive(Debug, Parser)]
49pub struct ImportV2Command {
50 #[clap(long)]
52 addr: String,
53
54 #[clap(long)]
56 from: String,
57
58 #[clap(long, default_value = "greptime")]
60 catalog: String,
61
62 #[clap(long, value_delimiter = ',')]
65 schemas: Vec<String>,
66
67 #[clap(long)]
69 dry_run: bool,
70
71 #[clap(long, value_enum, default_value_t = ProgressMode::Auto)]
73 progress: ProgressMode,
74
75 #[clap(long, default_value = "1", value_parser = parse_task_parallelism)]
77 task_parallelism: usize,
78
79 #[clap(long)]
83 state_path: Option<PathBuf>,
84
85 #[clap(long)]
87 auth_basic: Option<String>,
88
89 #[clap(long, value_parser = humantime::parse_duration)]
91 timeout: Option<Duration>,
92
93 #[clap(long)]
98 proxy: Option<String>,
99
100 #[clap(long)]
104 no_proxy: bool,
105
106 #[clap(flatten)]
108 storage: ObjectStoreConfig,
109}
110
111impl ImportV2Command {
112 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
113 validate_uri(&self.from)
115 .context(SnapshotStorageSnafu)
116 .map_err(BoxedError::new)?;
117
118 let schemas = if self.schemas.is_empty() {
120 None
121 } else {
122 Some(self.schemas.clone())
123 };
124
125 let storage = OpenDalStorage::from_uri(&self.from, &self.storage)
127 .context(SnapshotStorageSnafu)
128 .map_err(BoxedError::new)?;
129
130 let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
132 let database_client = DatabaseClient::new(
133 self.addr.clone(),
134 self.catalog.clone(),
135 self.auth_basic.clone(),
136 self.timeout.unwrap_or(Duration::from_secs(60)),
137 proxy,
138 self.no_proxy,
139 );
140
141 Ok(Box::new(Import {
142 catalog: self.catalog.clone(),
143 schemas,
144 dry_run: self.dry_run,
145 progress: self.progress,
146 task_parallelism: self.task_parallelism,
147 state_path: self.state_path.clone(),
148 snapshot_uri: self.from.clone(),
149 storage_config: self.storage.clone(),
150 storage: Box::new(storage),
151 database_client,
152 }))
153 }
154}
155
156fn resolve_state_path(
160 override_path: Option<&Path>,
161 snapshot_id: &str,
162 target_addr: &str,
163 catalog: &str,
164 schemas: &[String],
165) -> Result<PathBuf> {
166 if let Some(path) = override_path {
167 return Ok(path.to_path_buf());
168 }
169 default_state_path(snapshot_id, target_addr, catalog, schemas).context(
170 ImportStatePathUnavailableSnafu {
171 snapshot_id: snapshot_id.to_string(),
172 },
173 )
174}
175
176fn parse_task_parallelism(value: &str) -> std::result::Result<usize, String> {
177 let parallelism = value
178 .parse::<usize>()
179 .map_err(|_| "task parallelism must be an integer between 1 and 64".to_string())?;
180 if (1..=64).contains(¶llelism) {
181 Ok(parallelism)
182 } else {
183 Err("task parallelism must be between 1 and 64".to_string())
184 }
185}
186
187pub struct Import {
189 catalog: String,
190 schemas: Option<Vec<String>>,
191 dry_run: bool,
192 progress: ProgressMode,
193 task_parallelism: usize,
194 state_path: Option<PathBuf>,
195 snapshot_uri: String,
196 storage_config: ObjectStoreConfig,
197 storage: Box<dyn SnapshotStorage>,
198 database_client: DatabaseClient,
199}
200
201#[async_trait]
202impl Tool for Import {
203 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
204 self.run().await.map_err(BoxedError::new)
205 }
206}
207
208impl Import {
209 async fn run(&self) -> Result<()> {
210 let manifest = self
212 .storage
213 .read_manifest()
214 .await
215 .context(SnapshotStorageSnafu)?;
216
217 info!(
218 "Loading snapshot: {} (version: {}, schema_only: {})",
219 manifest.snapshot_id, manifest.version, manifest.schema_only
220 );
221
222 if manifest.version != MANIFEST_VERSION {
224 return ManifestVersionMismatchSnafu {
225 expected: MANIFEST_VERSION,
226 found: manifest.version,
227 }
228 .fail();
229 }
230
231 info!("Snapshot contains {} schema(s)", manifest.schemas.len());
232
233 let schemas_to_import = match &self.schemas {
235 Some(filter) => canonicalize_schema_filter(filter, &manifest.schemas)?,
236 None => manifest.schemas.clone(),
237 };
238
239 info!("Importing schemas: {:?}", schemas_to_import);
240
241 let ddl_statements = self.read_ddl_statements(&schemas_to_import).await?;
243
244 info!("Generated {} DDL statements", ddl_statements.len());
245
246 let data_tasks = if !manifest.schema_only && !manifest.chunks.is_empty() {
247 validate_data_snapshot(self.storage.as_ref(), &manifest.chunks, &schemas_to_import)
248 .await?;
249 build_import_tasks(&manifest.chunks, &schemas_to_import)
250 } else {
251 Vec::new()
252 };
253
254 if self.dry_run {
256 info!("Dry-run mode - DDL statements to execute:");
257 println!();
258 for (i, stmt) in ddl_statements.iter().enumerate() {
259 println!("-- Statement {}", i + 1);
260 println!("{};", stmt.sql);
261 println!();
262 }
263 if !manifest.schema_only && !manifest.chunks.is_empty() {
264 for line in format_data_import_plan(&manifest.chunks, &schemas_to_import) {
265 println!("{line}");
266 }
267 println!();
268 }
269 return Ok(());
270 }
271
272 let mut resume_session = if !data_tasks.is_empty() {
273 let state_path = resolve_state_path(
274 self.state_path.as_deref(),
275 &manifest.snapshot_id.to_string(),
276 self.database_client.addr(),
277 &self.catalog,
278 &schemas_to_import,
279 )?;
280 Some(
281 prepare_import_resume(ImportResumeConfig {
282 snapshot_id: manifest.snapshot_id.to_string(),
283 target_addr: self.database_client.addr().to_string(),
284 catalog: self.catalog.clone(),
285 schemas: schemas_to_import.clone(),
286 state_path,
287 tasks: data_tasks,
288 task_parallelism: self.task_parallelism,
289 })
290 .await?,
291 )
292 } else {
293 None
294 };
295
296 let skip_ddl = resume_session
297 .as_ref()
298 .map(|session| session.should_skip_ddl())
299 .unwrap_or(false);
300
301 let ddl_executed = if skip_ddl {
303 info!(
304 "Existing import state has DDL marked completed; skipping DDL execution and resuming data import"
305 );
306 false
307 } else {
308 let executor = DdlExecutor::new(&self.database_client);
309 executor.execute_strict(&ddl_statements).await?;
310 if let Some(session) = resume_session.as_mut() {
311 session.mark_ddl_completed().await?;
312 }
313 true
314 };
315
316 if let Some(resume_session) = resume_session {
317 let executor = CopyDatabaseImportTaskExecutor {
318 import: self,
319 format: manifest.format,
320 };
321 let progress = build_progress_reporter(self.progress);
322 import_with_resume_session_with_progress(resume_session, &executor, progress.as_ref())
323 .await?;
324 }
325
326 if ddl_executed {
327 info!(
328 "Import completed: {} DDL statements executed",
329 ddl_statements.len()
330 );
331 } else {
332 info!("Import completed: DDL execution skipped");
333 }
334
335 Ok(())
336 }
337
338 async fn read_ddl_statements(&self, schemas: &[String]) -> Result<Vec<DdlStatement>> {
339 let mut statements = Vec::new();
340 for schema in schemas {
341 let path = ddl_path_for_schema(schema);
342 let content = self
343 .storage
344 .read_text(&path)
345 .await
346 .context(SnapshotStorageSnafu)?;
347 statements.extend(
348 parse_ddl_statements(&content)
349 .into_iter()
350 .map(|sql| ddl_statement_for_schema(schema, sql)),
351 );
352 }
353
354 Ok(statements)
355 }
356}
357
358struct CopyDatabaseImportTaskExecutor<'a> {
359 import: &'a Import,
360 format: DataFormat,
361}
362
363#[async_trait]
364impl ImportTaskExecutor for CopyDatabaseImportTaskExecutor<'_> {
365 async fn import_task(&self, task: &ImportTaskKey) -> Result<()> {
366 let source = build_copy_source(
367 &self.import.snapshot_uri,
368 &self.import.storage_config,
369 &task.schema,
370 task.chunk_id,
371 )
372 .context(ChunkImportFailedSnafu {
373 chunk_id: task.chunk_id,
374 schema: task.schema.clone(),
375 })?;
376
377 execute_copy_database_from(
378 &self.import.database_client,
379 &self.import.catalog,
380 &task.schema,
381 &source,
382 self.format,
383 )
384 .await
385 .context(ChunkImportFailedSnafu {
386 chunk_id: task.chunk_id,
387 schema: task.schema.clone(),
388 })
389 }
390}
391
392fn parse_ddl_statements(content: &str) -> Vec<String> {
393 let mut statements = Vec::new();
394 let mut current = String::new();
395 let mut chars = content.chars().peekable();
396 let mut in_single_quote = false;
397 let mut in_double_quote = false;
398 let mut in_line_comment = false;
399 let mut in_block_comment = false;
400
401 while let Some(ch) = chars.next() {
402 if in_line_comment {
403 if ch == '\n' {
404 in_line_comment = false;
405 current.push('\n');
406 }
407 continue;
408 }
409
410 if in_block_comment {
411 if ch == '*' && chars.peek() == Some(&'/') {
412 chars.next();
413 in_block_comment = false;
414 }
415 continue;
416 }
417
418 if in_single_quote {
419 current.push(ch);
420 if ch == '\'' {
421 if chars.peek() == Some(&'\'') {
422 current.push(chars.next().expect("peeked quote must exist"));
423 } else {
424 in_single_quote = false;
425 }
426 }
427 continue;
428 }
429
430 if in_double_quote {
431 current.push(ch);
432 if ch == '"' {
433 if chars.peek() == Some(&'"') {
434 current.push(chars.next().expect("peeked quote must exist"));
435 } else {
436 in_double_quote = false;
437 }
438 }
439 continue;
440 }
441
442 match ch {
443 '-' if chars.peek() == Some(&'-') => {
444 chars.next();
445 in_line_comment = true;
446 }
447 '/' if chars.peek() == Some(&'*') => {
448 chars.next();
449 in_block_comment = true;
450 }
451 '\'' => {
452 in_single_quote = true;
453 current.push(ch);
454 }
455 '"' => {
456 in_double_quote = true;
457 current.push(ch);
458 }
459 ';' => {
460 let statement = current.trim();
461 if !statement.is_empty() {
462 statements.push(statement.to_string());
463 }
464 current.clear();
465 }
466 _ => current.push(ch),
467 }
468 }
469
470 let statement = current.trim();
471 if !statement.is_empty() {
472 statements.push(statement.to_string());
473 }
474
475 statements
476}
477
478fn ddl_statement_for_schema(schema: &str, sql: String) -> DdlStatement {
479 if is_schema_scoped_statement(&sql) {
480 DdlStatement::with_execution_schema(sql, schema.to_string())
481 } else {
482 DdlStatement::new(sql)
483 }
484}
485
486fn is_schema_scoped_statement(sql: &str) -> bool {
487 let trimmed = sql.trim_start();
488 if !starts_with_keyword(trimmed, "CREATE") {
489 return false;
490 }
491
492 let Some(rest) = trimmed.get("CREATE".len()..) else {
493 return false;
494 };
495 let mut rest = rest.trim_start();
496 if starts_with_keyword(rest, "OR") {
497 let Some(next) = rest.get("OR".len()..) else {
498 return false;
499 };
500 rest = next.trim_start();
501 if !starts_with_keyword(rest, "REPLACE") {
502 return false;
503 }
504 let Some(next) = rest.get("REPLACE".len()..) else {
505 return false;
506 };
507 rest = next.trim_start();
508 }
509
510 if starts_with_keyword(rest, "EXTERNAL") {
511 let Some(next) = rest.get("EXTERNAL".len()..) else {
512 return false;
513 };
514 rest = next.trim_start();
515 }
516
517 starts_with_keyword(rest, "TABLE") || starts_with_keyword(rest, "VIEW")
518}
519
520fn starts_with_keyword(input: &str, keyword: &str) -> bool {
521 input
522 .get(0..keyword.len())
523 .map(|s| s.eq_ignore_ascii_case(keyword))
524 .unwrap_or(false)
525 && input
526 .as_bytes()
527 .get(keyword.len())
528 .map(|b| !b.is_ascii_alphanumeric() && *b != b'_')
529 .unwrap_or(true)
530}
531
532fn canonicalize_schema_filter(
533 filter: &[String],
534 manifest_schemas: &[String],
535) -> Result<Vec<String>> {
536 let mut canonicalized = Vec::new();
537 let mut seen = HashSet::new();
538
539 for schema in filter {
540 let canonical = manifest_schemas
541 .iter()
542 .find(|candidate| candidate.eq_ignore_ascii_case(schema))
543 .cloned()
544 .ok_or_else(|| {
545 SchemaNotInSnapshotSnafu {
546 schema: schema.clone(),
547 }
548 .build()
549 })?;
550
551 if seen.insert(canonical.to_ascii_lowercase()) {
552 canonicalized.push(canonical);
553 }
554 }
555
556 Ok(canonicalized)
557}
558
559fn validate_chunk_statuses(chunks: &[ChunkMeta]) -> Result<()> {
560 let invalid_chunk = chunks
561 .iter()
562 .find(|chunk| !matches!(chunk.status, ChunkStatus::Completed | ChunkStatus::Skipped));
563
564 if let Some(chunk) = invalid_chunk {
565 return IncompleteSnapshotSnafu {
566 chunk_id: chunk.id,
567 status: chunk.status,
568 }
569 .fail();
570 }
571
572 Ok(())
573}
574
575fn format_data_import_plan(chunks: &[ChunkMeta], schemas: &[String]) -> Vec<String> {
576 let mut lines = vec!["-- Data import plan:".to_string()];
577 for chunk in chunks {
578 lines.push(format!("-- Chunk {}: {:?}", chunk.id, chunk.status));
579 for schema in schemas {
580 if chunk_has_schema_files(chunk, schema) {
581 lines.push(format!("-- {} -> COPY DATABASE FROM", schema));
582 }
583 }
584 }
585 lines
586}
587
588async fn validate_data_snapshot(
589 storage: &dyn SnapshotStorage,
590 chunks: &[ChunkMeta],
591 schemas: &[String],
592) -> Result<()> {
593 validate_chunk_statuses(chunks)?;
594 let actual_prefixes = collect_chunk_data_prefixes(storage).await?;
595
596 for chunk in chunks {
597 if chunk.status == ChunkStatus::Skipped {
598 continue;
599 }
600 if chunk.files.is_empty() {
601 return EmptyChunkManifestSnafu { chunk_id: chunk.id }.fail();
602 }
603 for schema in schemas {
604 validate_chunk_schema_files(chunk, schema, &actual_prefixes)?;
605 }
606 }
607
608 Ok(())
609}
610
611async fn collect_chunk_data_prefixes(storage: &dyn SnapshotStorage) -> Result<HashSet<String>> {
612 let files = storage
613 .list_files_recursive("data/")
614 .await
615 .context(SnapshotStorageSnafu)?;
616 let mut prefixes = HashSet::new();
617
618 for path in files {
619 let normalized = path.trim_start_matches('/');
620 let mut parts = normalized.splitn(4, '/');
621 let Some(root) = parts.next() else {
622 continue;
623 };
624 let Some(schema) = parts.next() else {
625 continue;
626 };
627 let Some(chunk_id) = parts.next() else {
628 continue;
629 };
630 if root != "data" {
631 continue;
632 }
633 prefixes.insert(format!("data/{schema}/{chunk_id}/"));
634 }
635
636 Ok(prefixes)
637}
638
639fn validate_chunk_schema_files(
640 chunk: &ChunkMeta,
641 schema: &str,
642 actual_prefixes: &HashSet<String>,
643) -> Result<bool> {
644 if !chunk_has_schema_files(chunk, schema) {
645 return Ok(false);
646 }
647
648 let prefix = data_dir_for_schema_chunk(schema, chunk.id);
649 if !actual_prefixes.contains(&prefix) {
650 return MissingChunkDataSnafu {
651 chunk_id: chunk.id,
652 schema: schema.to_string(),
653 path: prefix,
654 }
655 .fail();
656 }
657
658 Ok(true)
659}
660
661#[cfg(test)]
662mod tests {
663 use std::collections::{HashMap, HashSet};
664
665 use async_trait::async_trait;
666
667 use super::*;
668 use crate::data::export_v2::manifest::{ChunkMeta, ChunkStatus, Manifest, TimeRange};
669 use crate::data::export_v2::schema::SchemaSnapshot;
670 use crate::data::snapshot_storage::SnapshotStorage;
671
672 struct StubStorage {
673 manifest: Manifest,
674 files_by_prefix: HashMap<String, Vec<String>>,
675 }
676
677 #[async_trait]
678 impl SnapshotStorage for StubStorage {
679 async fn exists(&self) -> crate::data::export_v2::error::Result<bool> {
680 Ok(true)
681 }
682
683 async fn read_manifest(&self) -> crate::data::export_v2::error::Result<Manifest> {
684 Ok(self.manifest.clone())
685 }
686
687 async fn write_manifest(
688 &self,
689 _manifest: &Manifest,
690 ) -> crate::data::export_v2::error::Result<()> {
691 unimplemented!("not needed in import_v2::command tests")
692 }
693
694 async fn read_text(&self, _path: &str) -> crate::data::export_v2::error::Result<String> {
695 unimplemented!("not needed in import_v2::command tests")
696 }
697
698 async fn write_text(
699 &self,
700 _path: &str,
701 _content: &str,
702 ) -> crate::data::export_v2::error::Result<()> {
703 unimplemented!("not needed in import_v2::command tests")
704 }
705
706 async fn write_schema(
707 &self,
708 _snapshot: &SchemaSnapshot,
709 ) -> crate::data::export_v2::error::Result<()> {
710 unimplemented!("not needed in import_v2::command tests")
711 }
712
713 async fn create_dir_all(&self, _path: &str) -> crate::data::export_v2::error::Result<()> {
714 unimplemented!("not needed in import_v2::command tests")
715 }
716
717 async fn list_files_recursive(
718 &self,
719 prefix: &str,
720 ) -> crate::data::export_v2::error::Result<Vec<String>> {
721 Ok(self
722 .files_by_prefix
723 .iter()
724 .filter(|(candidate, _)| candidate.starts_with(prefix))
725 .flat_map(|(_, files)| files.clone())
726 .collect())
727 }
728
729 async fn delete_snapshot(&self) -> crate::data::export_v2::error::Result<()> {
730 unimplemented!("not needed in import_v2::command tests")
731 }
732 }
733
734 fn parse_command(extra: &[&str]) -> ImportV2Command {
735 let mut args = vec![
736 "import-v2",
737 "--addr",
738 "127.0.0.1:4000",
739 "--from",
740 "file:///tmp/snapshot",
741 ];
742 args.extend_from_slice(extra);
743 ImportV2Command::try_parse_from(args).expect("command should parse")
744 }
745
746 #[test]
747 fn test_progress_mode_defaults_to_auto() {
748 assert_eq!(parse_command(&[]).progress, ProgressMode::Auto);
749 }
750
751 #[test]
752 fn test_progress_mode_parses_explicit_values() {
753 assert_eq!(
754 parse_command(&["--progress", "always"]).progress,
755 ProgressMode::Always
756 );
757 assert_eq!(
758 parse_command(&["--progress", "never"]).progress,
759 ProgressMode::Never
760 );
761 assert_eq!(
762 parse_command(&["--progress", "auto"]).progress,
763 ProgressMode::Auto
764 );
765 }
766
767 #[test]
768 fn test_progress_mode_rejects_unknown_value() {
769 assert!(
770 ImportV2Command::try_parse_from([
771 "import-v2",
772 "--addr",
773 "127.0.0.1:4000",
774 "--from",
775 "file:///tmp/snapshot",
776 "--progress",
777 "bogus",
778 ])
779 .is_err()
780 );
781 }
782
783 #[test]
784 fn test_task_parallelism_defaults_to_one() {
785 assert_eq!(parse_command(&[]).task_parallelism, 1);
786 }
787
788 #[test]
789 fn test_task_parallelism_parses_valid_values() {
790 assert_eq!(
791 parse_command(&["--task-parallelism", "2"]).task_parallelism,
792 2
793 );
794 assert_eq!(
795 parse_command(&["--task-parallelism", "64"]).task_parallelism,
796 64
797 );
798 }
799
800 #[test]
801 fn test_state_path_defaults_to_none() {
802 assert_eq!(parse_command(&[]).state_path, None);
803 }
804
805 #[test]
806 fn test_state_path_parses_explicit_value() {
807 assert_eq!(
808 parse_command(&["--state-path", "/tmp/import_state.json"]).state_path,
809 Some(PathBuf::from("/tmp/import_state.json"))
810 );
811 }
812
813 #[test]
814 fn test_resolve_state_path_prefers_override() {
815 let override_path = PathBuf::from("/tmp/custom_import_state.json");
816 let resolved = resolve_state_path(
817 Some(override_path.as_path()),
818 "snapshot-1",
819 "127.0.0.1:4000",
820 "greptime",
821 &["public".to_string()],
822 )
823 .unwrap();
824 assert_eq!(resolved, override_path);
825 }
826
827 #[test]
828 fn test_resolve_state_path_uses_default_when_absent() {
829 let resolved = resolve_state_path(
830 None,
831 "snapshot-1",
832 "127.0.0.1:4000",
833 "greptime",
834 &["public".to_string()],
835 )
836 .unwrap();
837 let expected = default_state_path(
838 "snapshot-1",
839 "127.0.0.1:4000",
840 "greptime",
841 &["public".to_string()],
842 )
843 .unwrap();
844 assert_eq!(resolved, expected);
845 }
846
847 #[test]
848 fn test_task_parallelism_rejects_invalid_values() {
849 for value in ["0", "65", "abc"] {
850 assert!(
851 ImportV2Command::try_parse_from([
852 "import-v2",
853 "--addr",
854 "127.0.0.1:4000",
855 "--from",
856 "file:///tmp/snapshot",
857 "--task-parallelism",
858 value,
859 ])
860 .is_err(),
861 "value {value} should be rejected"
862 );
863 }
864 }
865
866 #[test]
867 fn test_parse_ddl_statements() {
868 let content = r#"
869-- Schema: public
870CREATE DATABASE public;
871CREATE TABLE t (ts TIMESTAMP TIME INDEX, host STRING, PRIMARY KEY (host)) ENGINE=mito;
872
873-- comment
874CREATE VIEW v AS SELECT * FROM t;
875"#;
876 let statements = parse_ddl_statements(content);
877 assert_eq!(statements.len(), 3);
878 assert!(statements[0].starts_with("CREATE DATABASE public"));
879 assert!(statements[1].starts_with("CREATE TABLE t"));
880 assert!(statements[2].starts_with("CREATE VIEW v"));
881 }
882
883 #[test]
884 fn test_parse_ddl_statements_preserves_semicolons_in_string_literals() {
885 let content = r#"
886CREATE TABLE t (
887 host STRING DEFAULT 'a;b'
888);
889CREATE VIEW v AS SELECT ';' AS marker;
890"#;
891
892 let statements = parse_ddl_statements(content);
893
894 assert_eq!(statements.len(), 2);
895 assert!(statements[0].contains("'a;b'"));
896 assert!(statements[1].contains("';' AS marker"));
897 }
898
899 #[test]
900 fn test_parse_ddl_statements_handles_comments_without_splitting() {
901 let content = r#"
902-- leading comment
903CREATE TABLE t (ts TIMESTAMP TIME INDEX); /* block; comment */
904CREATE VIEW v AS SELECT 1;
905"#;
906
907 let statements = parse_ddl_statements(content);
908
909 assert_eq!(statements.len(), 2);
910 assert!(statements[0].starts_with("CREATE TABLE t"));
911 assert!(statements[1].starts_with("CREATE VIEW v"));
912 }
913
914 #[test]
915 fn test_canonicalize_schema_filter_uses_manifest_casing() {
916 let filter = vec!["TEST_DB".to_string(), "PUBLIC".to_string()];
917 let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
918
919 let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
920
921 assert_eq!(canonicalized, vec!["test_db", "public"]);
922 }
923
924 #[test]
925 fn test_canonicalize_schema_filter_dedupes_case_insensitive_matches() {
926 let filter = vec![
927 "TEST_DB".to_string(),
928 "test_db".to_string(),
929 "PUBLIC".to_string(),
930 "public".to_string(),
931 ];
932 let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
933
934 let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
935
936 assert_eq!(canonicalized, vec!["test_db", "public"]);
937 }
938
939 #[test]
940 fn test_canonicalize_schema_filter_rejects_missing_schema() {
941 let filter = vec!["missing".to_string()];
942 let manifest_schemas = vec!["test_db".to_string()];
943
944 let error = canonicalize_schema_filter(&filter, &manifest_schemas)
945 .expect_err("missing schema should fail")
946 .to_string();
947
948 assert!(error.contains("missing"));
949 }
950
951 #[test]
952 fn test_ddl_statement_for_schema_create_table_uses_execution_schema() {
953 let stmt = ddl_statement_for_schema(
954 "test_db",
955 "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX) ENGINE=mito".to_string(),
956 );
957 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
958 }
959
960 #[test]
961 fn test_ddl_statement_for_schema_create_view_uses_execution_schema() {
962 let stmt = ddl_statement_for_schema(
963 "test_db",
964 "CREATE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
965 );
966 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
967 }
968
969 #[test]
970 fn test_ddl_statement_for_schema_create_or_replace_view_uses_execution_schema() {
971 let stmt = ddl_statement_for_schema(
972 "test_db",
973 "CREATE OR REPLACE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
974 );
975 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
976 }
977
978 #[test]
979 fn test_ddl_statement_for_schema_create_external_table_uses_execution_schema() {
980 let stmt = ddl_statement_for_schema(
981 "test_db",
982 "CREATE EXTERNAL TABLE IF NOT EXISTS ext_metrics (ts TIMESTAMP TIME INDEX) ENGINE=file"
983 .to_string(),
984 );
985 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
986 }
987
988 #[test]
989 fn test_ddl_statement_for_schema_create_database_uses_public_context() {
990 let stmt = ddl_statement_for_schema("test_db", "CREATE DATABASE test_db".to_string());
991 assert_eq!(stmt.execution_schema, None);
992 }
993
994 #[test]
995 fn test_starts_with_keyword_requires_word_boundary() {
996 assert!(starts_with_keyword("CREATE TABLE t", "CREATE"));
997 assert!(!starts_with_keyword("CREATED TABLE t", "CREATE"));
998 assert!(!starts_with_keyword("TABLESPACE foo", "TABLE"));
999 }
1000
1001 #[test]
1002 fn test_validate_chunk_statuses_rejects_failed_chunk() {
1003 let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
1004 failed.status = ChunkStatus::Failed;
1005
1006 let error = validate_chunk_statuses(&[failed]).expect_err("failed chunk should error");
1007 assert!(error.to_string().contains("Incomplete snapshot"));
1008 }
1009
1010 #[test]
1011 fn test_validate_chunk_statuses_accepts_completed_and_skipped_chunks() {
1012 let mut completed = ChunkMeta::new(1, TimeRange::unbounded());
1013 completed.status = ChunkStatus::Completed;
1014 let skipped = ChunkMeta::skipped(2, TimeRange::unbounded());
1015
1016 assert!(validate_chunk_statuses(&[completed, skipped]).is_ok());
1017 }
1018
1019 #[test]
1020 fn test_chunk_has_schema_files_matches_encoded_schema_prefix() {
1021 let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1022 chunk.files = vec![
1023 "data/public/7/a.parquet".to_string(),
1024 "data/%E6%B5%8B%E8%AF%95/7/b.parquet".to_string(),
1025 ];
1026
1027 assert!(chunk_has_schema_files(&chunk, "public"));
1028 assert!(chunk_has_schema_files(&chunk, "测试"));
1029 assert!(!chunk_has_schema_files(&chunk, "metrics"));
1030 }
1031
1032 #[test]
1033 fn test_format_data_import_plan_includes_matching_schemas_only() {
1034 let mut completed = ChunkMeta::new(1, TimeRange::unbounded());
1035 completed.status = ChunkStatus::Completed;
1036 completed.files = vec![
1037 "data/public/1/a.parquet".to_string(),
1038 "data/%E6%B5%8B%E8%AF%95/1/b.parquet".to_string(),
1039 ];
1040 let skipped = ChunkMeta::skipped(2, TimeRange::unbounded());
1041
1042 let lines = format_data_import_plan(
1043 &[completed, skipped],
1044 &[
1045 "public".to_string(),
1046 "测试".to_string(),
1047 "metrics".to_string(),
1048 ],
1049 );
1050
1051 assert_eq!(lines[0], "-- Data import plan:");
1052 assert!(lines.contains(&"-- Chunk 1: Completed".to_string()));
1053 assert!(lines.contains(&"-- public -> COPY DATABASE FROM".to_string()));
1054 assert!(lines.contains(&"-- 测试 -> COPY DATABASE FROM".to_string()));
1055 assert!(!lines.contains(&"-- metrics -> COPY DATABASE FROM".to_string()));
1056 assert!(lines.contains(&"-- Chunk 2: Skipped".to_string()));
1057 }
1058
1059 #[tokio::test]
1060 async fn test_collect_chunk_data_prefixes_indexes_present_prefixes() {
1061 let storage = StubStorage {
1062 manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1063 files_by_prefix: HashMap::from([
1064 (
1065 "data/public/7/".to_string(),
1066 vec!["data/public/7/a.parquet".to_string()],
1067 ),
1068 (
1069 "data/%E6%B5%8B%E8%AF%95/9/".to_string(),
1070 vec!["data/%E6%B5%8B%E8%AF%95/9/b.parquet".to_string()],
1071 ),
1072 ]),
1073 };
1074
1075 let prefixes = collect_chunk_data_prefixes(&storage).await.unwrap();
1076
1077 assert!(prefixes.contains("data/public/7/"));
1078 assert!(prefixes.contains("data/%E6%B5%8B%E8%AF%95/9/"));
1079 }
1080
1081 #[test]
1082 fn test_validate_chunk_schema_files_accepts_present_prefix() {
1083 let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1084 chunk.files = vec!["data/public/7/a.parquet".to_string()];
1085 let actual_prefixes = HashSet::from(["data/public/7/".to_string()]);
1086
1087 assert!(validate_chunk_schema_files(&chunk, "public", &actual_prefixes).unwrap());
1088 }
1089
1090 #[test]
1091 fn test_validate_chunk_schema_files_rejects_missing_prefix() {
1092 let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1093 chunk.files = vec!["data/public/7/a.parquet".to_string()];
1094
1095 let error = validate_chunk_schema_files(&chunk, "public", &HashSet::new())
1096 .expect_err("missing chunk prefix should fail")
1097 .to_string();
1098 assert!(error.contains("marked completed but no files were found"));
1099 }
1100
1101 #[test]
1102 fn test_validate_chunk_schema_files_skips_absent_schema() {
1103 let mut chunk = ChunkMeta::new(7, TimeRange::unbounded());
1104 chunk.files = vec!["data/public/7/a.parquet".to_string()];
1105
1106 assert!(!validate_chunk_schema_files(&chunk, "metrics", &HashSet::new()).unwrap());
1107 }
1108
1109 #[tokio::test]
1110 async fn test_validate_data_snapshot_rejects_failed_chunk_before_dry_run() {
1111 let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
1112 failed.status = ChunkStatus::Failed;
1113
1114 let storage = StubStorage {
1115 manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1116 files_by_prefix: HashMap::new(),
1117 };
1118
1119 let error = validate_data_snapshot(&storage, &[failed], &["public".to_string()])
1120 .await
1121 .expect_err("failed chunk should reject dry-run validation")
1122 .to_string();
1123 assert!(error.contains("Incomplete snapshot"));
1124 }
1125
1126 #[tokio::test]
1127 async fn test_validate_data_snapshot_rejects_missing_chunk_prefix_before_dry_run() {
1128 let mut completed = ChunkMeta::new(7, TimeRange::unbounded());
1129 completed.status = ChunkStatus::Completed;
1130 completed.files = vec!["data/public/7/a.parquet".to_string()];
1131
1132 let storage = StubStorage {
1133 manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1134 files_by_prefix: HashMap::new(),
1135 };
1136
1137 let error = validate_data_snapshot(&storage, &[completed], &["public".to_string()])
1138 .await
1139 .expect_err("missing chunk prefix should reject dry-run validation")
1140 .to_string();
1141 assert!(error.contains("marked completed but no files were found"));
1142 }
1143
1144 #[tokio::test]
1145 async fn test_validate_data_snapshot_rejects_completed_chunk_with_empty_manifest() {
1146 let mut completed = ChunkMeta::new(7, TimeRange::unbounded());
1147 completed.status = ChunkStatus::Completed;
1148
1149 let storage = StubStorage {
1150 manifest: Manifest::new_schema_only("greptime".to_string(), vec!["public".to_string()]),
1151 files_by_prefix: HashMap::new(),
1152 };
1153
1154 let error = validate_data_snapshot(&storage, &[completed], &["public".to_string()])
1155 .await
1156 .expect_err("empty completed chunk should reject validation")
1157 .to_string();
1158 assert!(error.contains("file manifest is empty"));
1159 }
1160}