1use std::collections::HashSet;
18use std::time::Duration;
19
20use async_trait::async_trait;
21use clap::Parser;
22use common_error::ext::BoxedError;
23use common_telemetry::info;
24use snafu::ResultExt;
25
26use crate::Tool;
27use crate::common::ObjectStoreConfig;
28use crate::data::export_v2::manifest::MANIFEST_VERSION;
29use crate::data::import_v2::error::{
30 ManifestVersionMismatchSnafu, Result, SchemaNotInSnapshotSnafu, SnapshotStorageSnafu,
31};
32use crate::data::import_v2::executor::{DdlExecutor, DdlStatement};
33use crate::data::path::ddl_path_for_schema;
34use crate::data::snapshot_storage::{OpenDalStorage, SnapshotStorage, validate_uri};
35use crate::database::{DatabaseClient, parse_proxy_opts};
36
37#[derive(Debug, Parser)]
39pub struct ImportV2Command {
40 #[clap(long)]
42 addr: String,
43
44 #[clap(long)]
46 from: String,
47
48 #[clap(long, default_value = "greptime")]
50 catalog: String,
51
52 #[clap(long, value_delimiter = ',')]
55 schemas: Vec<String>,
56
57 #[clap(long)]
59 dry_run: bool,
60
61 #[clap(long, default_value = "1")]
63 parallelism: usize,
64
65 #[clap(long)]
67 auth_basic: Option<String>,
68
69 #[clap(long, value_parser = humantime::parse_duration)]
71 timeout: Option<Duration>,
72
73 #[clap(long)]
78 proxy: Option<String>,
79
80 #[clap(long)]
84 no_proxy: bool,
85
86 #[clap(flatten)]
88 storage: ObjectStoreConfig,
89}
90
91impl ImportV2Command {
92 pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
93 validate_uri(&self.from)
95 .context(SnapshotStorageSnafu)
96 .map_err(BoxedError::new)?;
97
98 let schemas = if self.schemas.is_empty() {
100 None
101 } else {
102 Some(self.schemas.clone())
103 };
104
105 let storage = OpenDalStorage::from_uri(&self.from, &self.storage)
107 .context(SnapshotStorageSnafu)
108 .map_err(BoxedError::new)?;
109
110 let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
112 let database_client = DatabaseClient::new(
113 self.addr.clone(),
114 self.catalog.clone(),
115 self.auth_basic.clone(),
116 self.timeout.unwrap_or(Duration::from_secs(60)),
117 proxy,
118 self.no_proxy,
119 );
120
121 Ok(Box::new(Import {
122 schemas,
123 dry_run: self.dry_run,
124 _parallelism: self.parallelism,
125 storage: Box::new(storage),
126 database_client,
127 }))
128 }
129}
130
131pub struct Import {
133 schemas: Option<Vec<String>>,
134 dry_run: bool,
135 _parallelism: usize,
136 storage: Box<dyn SnapshotStorage>,
137 database_client: DatabaseClient,
138}
139
140#[async_trait]
141impl Tool for Import {
142 async fn do_work(&self) -> std::result::Result<(), BoxedError> {
143 self.run().await.map_err(BoxedError::new)
144 }
145}
146
147impl Import {
148 async fn run(&self) -> Result<()> {
149 let manifest = self
151 .storage
152 .read_manifest()
153 .await
154 .context(SnapshotStorageSnafu)?;
155
156 info!(
157 "Loading snapshot: {} (version: {}, schema_only: {})",
158 manifest.snapshot_id, manifest.version, manifest.schema_only
159 );
160
161 if manifest.version != MANIFEST_VERSION {
163 return ManifestVersionMismatchSnafu {
164 expected: MANIFEST_VERSION,
165 found: manifest.version,
166 }
167 .fail();
168 }
169
170 info!("Snapshot contains {} schema(s)", manifest.schemas.len());
171
172 let schemas_to_import = match &self.schemas {
174 Some(filter) => canonicalize_schema_filter(filter, &manifest.schemas)?,
175 None => manifest.schemas.clone(),
176 };
177
178 info!("Importing schemas: {:?}", schemas_to_import);
179
180 let ddl_statements = self.read_ddl_statements(&schemas_to_import).await?;
182
183 info!("Generated {} DDL statements", ddl_statements.len());
184
185 if self.dry_run {
187 info!("Dry-run mode - DDL statements to execute:");
188 println!();
189 for (i, stmt) in ddl_statements.iter().enumerate() {
190 println!("-- Statement {}", i + 1);
191 println!("{};", stmt.sql);
192 println!();
193 }
194 return Ok(());
195 }
196
197 let executor = DdlExecutor::new(&self.database_client);
199 executor.execute_strict(&ddl_statements).await?;
200
201 info!(
202 "Import completed: {} DDL statements executed",
203 ddl_statements.len()
204 );
205
206 if !manifest.schema_only && !manifest.chunks.is_empty() {
208 info!(
209 "Data import not yet implemented (M3). {} chunks pending.",
210 manifest.chunks.len()
211 );
212 }
213
214 Ok(())
215 }
216
217 async fn read_ddl_statements(&self, schemas: &[String]) -> Result<Vec<DdlStatement>> {
218 let mut statements = Vec::new();
219 for schema in schemas {
220 let path = ddl_path_for_schema(schema);
221 let content = self
222 .storage
223 .read_text(&path)
224 .await
225 .context(SnapshotStorageSnafu)?;
226 statements.extend(
227 parse_ddl_statements(&content)
228 .into_iter()
229 .map(|sql| ddl_statement_for_schema(schema, sql)),
230 );
231 }
232
233 Ok(statements)
234 }
235}
236
237fn parse_ddl_statements(content: &str) -> Vec<String> {
238 let mut statements = Vec::new();
239 let mut current = String::new();
240 let mut chars = content.chars().peekable();
241 let mut in_single_quote = false;
242 let mut in_double_quote = false;
243 let mut in_line_comment = false;
244 let mut in_block_comment = false;
245
246 while let Some(ch) = chars.next() {
247 if in_line_comment {
248 if ch == '\n' {
249 in_line_comment = false;
250 current.push('\n');
251 }
252 continue;
253 }
254
255 if in_block_comment {
256 if ch == '*' && chars.peek() == Some(&'/') {
257 chars.next();
258 in_block_comment = false;
259 }
260 continue;
261 }
262
263 if in_single_quote {
264 current.push(ch);
265 if ch == '\'' {
266 if chars.peek() == Some(&'\'') {
267 current.push(chars.next().expect("peeked quote must exist"));
268 } else {
269 in_single_quote = false;
270 }
271 }
272 continue;
273 }
274
275 if in_double_quote {
276 current.push(ch);
277 if ch == '"' {
278 if chars.peek() == Some(&'"') {
279 current.push(chars.next().expect("peeked quote must exist"));
280 } else {
281 in_double_quote = false;
282 }
283 }
284 continue;
285 }
286
287 match ch {
288 '-' if chars.peek() == Some(&'-') => {
289 chars.next();
290 in_line_comment = true;
291 }
292 '/' if chars.peek() == Some(&'*') => {
293 chars.next();
294 in_block_comment = true;
295 }
296 '\'' => {
297 in_single_quote = true;
298 current.push(ch);
299 }
300 '"' => {
301 in_double_quote = true;
302 current.push(ch);
303 }
304 ';' => {
305 let statement = current.trim();
306 if !statement.is_empty() {
307 statements.push(statement.to_string());
308 }
309 current.clear();
310 }
311 _ => current.push(ch),
312 }
313 }
314
315 let statement = current.trim();
316 if !statement.is_empty() {
317 statements.push(statement.to_string());
318 }
319
320 statements
321}
322
323fn ddl_statement_for_schema(schema: &str, sql: String) -> DdlStatement {
324 if is_schema_scoped_statement(&sql) {
325 DdlStatement::with_execution_schema(sql, schema.to_string())
326 } else {
327 DdlStatement::new(sql)
328 }
329}
330
331fn is_schema_scoped_statement(sql: &str) -> bool {
332 let trimmed = sql.trim_start();
333 if !starts_with_keyword(trimmed, "CREATE") {
334 return false;
335 }
336
337 let Some(rest) = trimmed.get("CREATE".len()..) else {
338 return false;
339 };
340 let mut rest = rest.trim_start();
341 if starts_with_keyword(rest, "OR") {
342 let Some(next) = rest.get("OR".len()..) else {
343 return false;
344 };
345 rest = next.trim_start();
346 if !starts_with_keyword(rest, "REPLACE") {
347 return false;
348 }
349 let Some(next) = rest.get("REPLACE".len()..) else {
350 return false;
351 };
352 rest = next.trim_start();
353 }
354
355 if starts_with_keyword(rest, "EXTERNAL") {
356 let Some(next) = rest.get("EXTERNAL".len()..) else {
357 return false;
358 };
359 rest = next.trim_start();
360 }
361
362 starts_with_keyword(rest, "TABLE") || starts_with_keyword(rest, "VIEW")
363}
364
365fn starts_with_keyword(input: &str, keyword: &str) -> bool {
366 input
367 .get(0..keyword.len())
368 .map(|s| s.eq_ignore_ascii_case(keyword))
369 .unwrap_or(false)
370 && input
371 .as_bytes()
372 .get(keyword.len())
373 .map(|b| !b.is_ascii_alphanumeric() && *b != b'_')
374 .unwrap_or(true)
375}
376
377fn canonicalize_schema_filter(
378 filter: &[String],
379 manifest_schemas: &[String],
380) -> Result<Vec<String>> {
381 let mut canonicalized = Vec::new();
382 let mut seen = HashSet::new();
383
384 for schema in filter {
385 let canonical = manifest_schemas
386 .iter()
387 .find(|candidate| candidate.eq_ignore_ascii_case(schema))
388 .cloned()
389 .ok_or_else(|| {
390 SchemaNotInSnapshotSnafu {
391 schema: schema.clone(),
392 }
393 .build()
394 })?;
395
396 if seen.insert(canonical.to_ascii_lowercase()) {
397 canonicalized.push(canonical);
398 }
399 }
400
401 Ok(canonicalized)
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407
408 #[test]
409 fn test_parse_ddl_statements() {
410 let content = r#"
411-- Schema: public
412CREATE DATABASE public;
413CREATE TABLE t (ts TIMESTAMP TIME INDEX, host STRING, PRIMARY KEY (host)) ENGINE=mito;
414
415-- comment
416CREATE VIEW v AS SELECT * FROM t;
417"#;
418 let statements = parse_ddl_statements(content);
419 assert_eq!(statements.len(), 3);
420 assert!(statements[0].starts_with("CREATE DATABASE public"));
421 assert!(statements[1].starts_with("CREATE TABLE t"));
422 assert!(statements[2].starts_with("CREATE VIEW v"));
423 }
424
425 #[test]
426 fn test_parse_ddl_statements_preserves_semicolons_in_string_literals() {
427 let content = r#"
428CREATE TABLE t (
429 host STRING DEFAULT 'a;b'
430);
431CREATE VIEW v AS SELECT ';' AS marker;
432"#;
433
434 let statements = parse_ddl_statements(content);
435
436 assert_eq!(statements.len(), 2);
437 assert!(statements[0].contains("'a;b'"));
438 assert!(statements[1].contains("';' AS marker"));
439 }
440
441 #[test]
442 fn test_parse_ddl_statements_handles_comments_without_splitting() {
443 let content = r#"
444-- leading comment
445CREATE TABLE t (ts TIMESTAMP TIME INDEX); /* block; comment */
446CREATE VIEW v AS SELECT 1;
447"#;
448
449 let statements = parse_ddl_statements(content);
450
451 assert_eq!(statements.len(), 2);
452 assert!(statements[0].starts_with("CREATE TABLE t"));
453 assert!(statements[1].starts_with("CREATE VIEW v"));
454 }
455
456 #[test]
457 fn test_canonicalize_schema_filter_uses_manifest_casing() {
458 let filter = vec!["TEST_DB".to_string(), "PUBLIC".to_string()];
459 let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
460
461 let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
462
463 assert_eq!(canonicalized, vec!["test_db", "public"]);
464 }
465
466 #[test]
467 fn test_canonicalize_schema_filter_dedupes_case_insensitive_matches() {
468 let filter = vec![
469 "TEST_DB".to_string(),
470 "test_db".to_string(),
471 "PUBLIC".to_string(),
472 "public".to_string(),
473 ];
474 let manifest_schemas = vec!["test_db".to_string(), "public".to_string()];
475
476 let canonicalized = canonicalize_schema_filter(&filter, &manifest_schemas).unwrap();
477
478 assert_eq!(canonicalized, vec!["test_db", "public"]);
479 }
480
481 #[test]
482 fn test_canonicalize_schema_filter_rejects_missing_schema() {
483 let filter = vec!["missing".to_string()];
484 let manifest_schemas = vec!["test_db".to_string()];
485
486 let error = canonicalize_schema_filter(&filter, &manifest_schemas)
487 .expect_err("missing schema should fail")
488 .to_string();
489
490 assert!(error.contains("missing"));
491 }
492
493 #[test]
494 fn test_ddl_statement_for_schema_create_table_uses_execution_schema() {
495 let stmt = ddl_statement_for_schema(
496 "test_db",
497 "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX) ENGINE=mito".to_string(),
498 );
499 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
500 }
501
502 #[test]
503 fn test_ddl_statement_for_schema_create_view_uses_execution_schema() {
504 let stmt = ddl_statement_for_schema(
505 "test_db",
506 "CREATE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
507 );
508 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
509 }
510
511 #[test]
512 fn test_ddl_statement_for_schema_create_or_replace_view_uses_execution_schema() {
513 let stmt = ddl_statement_for_schema(
514 "test_db",
515 "CREATE OR REPLACE VIEW metrics_view AS SELECT * FROM metrics".to_string(),
516 );
517 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
518 }
519
520 #[test]
521 fn test_ddl_statement_for_schema_create_external_table_uses_execution_schema() {
522 let stmt = ddl_statement_for_schema(
523 "test_db",
524 "CREATE EXTERNAL TABLE IF NOT EXISTS ext_metrics (ts TIMESTAMP TIME INDEX) ENGINE=file"
525 .to_string(),
526 );
527 assert_eq!(stmt.execution_schema.as_deref(), Some("test_db"));
528 }
529
530 #[test]
531 fn test_ddl_statement_for_schema_create_database_uses_public_context() {
532 let stmt = ddl_statement_for_schema("test_db", "CREATE DATABASE test_db".to_string());
533 assert_eq!(stmt.execution_schema, None);
534 }
535
536 #[test]
537 fn test_starts_with_keyword_requires_word_boundary() {
538 assert!(starts_with_keyword("CREATE TABLE t", "CREATE"));
539 assert!(!starts_with_keyword("CREATED TABLE t", "CREATE"));
540 assert!(!starts_with_keyword("TABLESPACE foo", "TABLE"));
541 }
542}