1use async_trait::async_trait;
21use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
22use object_store::util::{with_instrument_layers, with_retry_layers};
23use object_store::{AzblobConnection, GcsConnection, ObjectStore, OssConnection, S3Connection};
24use snafu::ResultExt;
25use url::Url;
26
27use crate::common::ObjectStoreConfig;
28use crate::data::export_v2::error::{
29 BuildObjectStoreSnafu, InvalidUriSnafu, ManifestParseSnafu, ManifestSerializeSnafu, Result,
30 SnapshotNotFoundSnafu, StorageOperationSnafu, TextDecodeSnafu, UnsupportedSchemeSnafu,
31 UrlParseSnafu,
32};
33use crate::data::export_v2::manifest::{MANIFEST_FILE, Manifest};
34#[cfg(test)]
35use crate::data::export_v2::schema::SchemaDefinition;
36use crate::data::export_v2::schema::{SCHEMA_DIR, SCHEMAS_FILE, SchemaSnapshot};
37
38struct RemoteLocation {
39 bucket_or_container: String,
40 root: String,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum StorageScheme {
46 S3,
48 Oss,
50 Gcs,
52 Azblob,
54 File,
56}
57
58impl StorageScheme {
59 pub fn from_uri(uri: &str) -> Result<Self> {
61 let url = Url::parse(uri).context(UrlParseSnafu)?;
62
63 match url.scheme() {
64 "s3" => Ok(Self::S3),
65 "oss" => Ok(Self::Oss),
66 "gs" | "gcs" => Ok(Self::Gcs),
67 "azblob" => Ok(Self::Azblob),
68 "file" => Ok(Self::File),
69 scheme => UnsupportedSchemeSnafu { scheme }.fail(),
70 }
71 }
72}
73
74fn extract_remote_location(uri: &str) -> Result<RemoteLocation> {
76 let url = Url::parse(uri).context(UrlParseSnafu)?;
77 let bucket_or_container = url.host_str().unwrap_or("").to_string();
78 if bucket_or_container.is_empty() {
79 return InvalidUriSnafu {
80 uri,
81 reason: "URI must include bucket/container in host",
82 }
83 .fail();
84 }
85
86 let root = url.path().trim_start_matches('/').to_string();
87 if root.is_empty() {
88 return InvalidUriSnafu {
89 uri,
90 reason: "snapshot URI must include a non-empty path after the bucket/container",
91 }
92 .fail();
93 }
94
95 Ok(RemoteLocation {
96 bucket_or_container,
97 root,
98 })
99}
100
101pub fn validate_uri(uri: &str) -> Result<StorageScheme> {
114 if !uri.contains("://") {
116 return InvalidUriSnafu {
117 uri,
118 reason: "URI must have a scheme (e.g., s3://, file://). Bare paths are not supported.",
119 }
120 .fail();
121 }
122
123 StorageScheme::from_uri(uri)
124}
125
126fn schema_index_path() -> String {
127 format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE)
128}
129
130fn extract_file_path_from_uri(uri: &str) -> Result<String> {
132 let url = Url::parse(uri).context(UrlParseSnafu)?;
133
134 match url.host_str() {
135 Some(host) if !host.is_empty() && host != "localhost" => InvalidUriSnafu {
136 uri,
137 reason: "file:// URI must use an absolute path like file:///tmp/backup",
138 }
139 .fail(),
140 _ => url
141 .to_file_path()
142 .map(|path| path.to_string_lossy().into_owned())
143 .map_err(|_| {
144 InvalidUriSnafu {
145 uri,
146 reason: "file:// URI must use a valid absolute filesystem path",
147 }
148 .build()
149 }),
150 }
151}
152
153async fn ensure_snapshot_exists(storage: &OpenDalStorage) -> Result<()> {
154 if storage.exists().await? {
155 Ok(())
156 } else {
157 SnapshotNotFoundSnafu {
158 uri: storage.target_uri.as_str(),
159 }
160 .fail()
161 }
162}
163
164#[async_trait]
168pub trait SnapshotStorage: Send + Sync {
169 async fn exists(&self) -> Result<bool>;
171
172 async fn read_manifest(&self) -> Result<Manifest>;
174
175 async fn write_manifest(&self, manifest: &Manifest) -> Result<()>;
177
178 async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()>;
180
181 async fn write_text(&self, path: &str, content: &str) -> Result<()>;
183
184 async fn read_text(&self, path: &str) -> Result<String>;
186
187 async fn delete_snapshot(&self) -> Result<()>;
189}
190
191pub struct OpenDalStorage {
193 object_store: ObjectStore,
194 target_uri: String,
195}
196
197impl OpenDalStorage {
198 fn new_operator_rooted(object_store: ObjectStore, target_uri: &str) -> Self {
199 Self {
200 object_store,
201 target_uri: target_uri.to_string(),
202 }
203 }
204
205 fn finish_local_store(object_store: ObjectStore) -> ObjectStore {
206 with_instrument_layers(object_store, false)
207 }
208
209 fn finish_remote_store(object_store: ObjectStore) -> ObjectStore {
210 with_instrument_layers(with_retry_layers(object_store), false)
211 }
212
213 fn ensure_backend_enabled(uri: &str, enabled: bool, reason: &'static str) -> Result<()> {
214 if enabled {
215 Ok(())
216 } else {
217 InvalidUriSnafu { uri, reason }.fail()
218 }
219 }
220
221 fn validate_remote_config<E: std::fmt::Display>(
222 uri: &str,
223 backend: &str,
224 result: std::result::Result<(), E>,
225 ) -> Result<()> {
226 result.map_err(|error| {
227 InvalidUriSnafu {
228 uri,
229 reason: format!("invalid {} config: {}", backend, error),
230 }
231 .build()
232 })
233 }
234
235 pub fn from_file_uri(uri: &str) -> Result<Self> {
237 let path = extract_file_path_from_uri(uri)?;
238
239 let builder = Fs::default().root(&path);
240 let object_store = ObjectStore::new(builder)
241 .context(BuildObjectStoreSnafu)?
242 .finish();
243 Ok(Self::new_operator_rooted(
244 Self::finish_local_store(object_store),
245 uri,
246 ))
247 }
248
249 fn from_file_uri_with_config(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
250 if storage.enable_s3 || storage.enable_oss || storage.enable_gcs || storage.enable_azblob {
251 return InvalidUriSnafu {
252 uri,
253 reason: "file:// cannot be used with remote storage flags",
254 }
255 .fail();
256 }
257
258 Self::from_file_uri(uri)
259 }
260
261 fn from_s3_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
262 Self::ensure_backend_enabled(
263 uri,
264 storage.enable_s3,
265 "s3:// requires --s3 and related options",
266 )?;
267
268 let location = extract_remote_location(uri)?;
269 let mut config = storage.s3.clone();
270 config.s3_bucket = location.bucket_or_container;
271 config.s3_root = location.root;
272 Self::validate_remote_config(uri, "s3", config.validate())?;
273
274 let conn: S3Connection = config.into();
275 let object_store = ObjectStore::new(S3::from(&conn))
276 .context(BuildObjectStoreSnafu)?
277 .finish();
278 Ok(Self::new_operator_rooted(
279 Self::finish_remote_store(object_store),
280 uri,
281 ))
282 }
283
284 fn from_oss_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
285 Self::ensure_backend_enabled(
286 uri,
287 storage.enable_oss,
288 "oss:// requires --oss and related options",
289 )?;
290
291 let location = extract_remote_location(uri)?;
292 let mut config = storage.oss.clone();
293 config.oss_bucket = location.bucket_or_container;
294 config.oss_root = location.root;
295 Self::validate_remote_config(uri, "oss", config.validate())?;
296
297 let conn: OssConnection = config.into();
298 let object_store = ObjectStore::new(Oss::from(&conn))
299 .context(BuildObjectStoreSnafu)?
300 .finish();
301 Ok(Self::new_operator_rooted(
302 Self::finish_remote_store(object_store),
303 uri,
304 ))
305 }
306
307 fn from_gcs_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
308 Self::ensure_backend_enabled(
309 uri,
310 storage.enable_gcs,
311 "gs:// or gcs:// requires --gcs and related options",
312 )?;
313
314 let location = extract_remote_location(uri)?;
315 let mut config = storage.gcs.clone();
316 config.gcs_bucket = location.bucket_or_container;
317 config.gcs_root = location.root;
318 Self::validate_remote_config(uri, "gcs", config.validate())?;
319
320 let conn: GcsConnection = config.into();
321 let object_store = ObjectStore::new(Gcs::from(&conn))
322 .context(BuildObjectStoreSnafu)?
323 .finish();
324 Ok(Self::new_operator_rooted(
325 Self::finish_remote_store(object_store),
326 uri,
327 ))
328 }
329
330 fn from_azblob_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
331 Self::ensure_backend_enabled(
332 uri,
333 storage.enable_azblob,
334 "azblob:// requires --azblob and related options",
335 )?;
336
337 let location = extract_remote_location(uri)?;
338 let mut config = storage.azblob.clone();
339 config.azblob_container = location.bucket_or_container;
340 config.azblob_root = location.root;
341 Self::validate_remote_config(uri, "azblob", config.validate())?;
342
343 let conn: AzblobConnection = config.into();
344 let object_store = ObjectStore::new(Azblob::from(&conn))
345 .context(BuildObjectStoreSnafu)?
346 .finish();
347 Ok(Self::new_operator_rooted(
348 Self::finish_remote_store(object_store),
349 uri,
350 ))
351 }
352
353 pub fn from_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
355 match StorageScheme::from_uri(uri)? {
356 StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
357 StorageScheme::S3 => Self::from_s3_uri(uri, storage),
358 StorageScheme::Oss => Self::from_oss_uri(uri, storage),
359 StorageScheme::Gcs => Self::from_gcs_uri(uri, storage),
360 StorageScheme::Azblob => Self::from_azblob_uri(uri, storage),
361 }
362 }
363
364 async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
366 let data = self
367 .object_store
368 .read(path)
369 .await
370 .context(StorageOperationSnafu {
371 operation: format!("read {}", path),
372 })?;
373 Ok(data.to_vec())
374 }
375
376 async fn write_file(&self, path: &str, data: Vec<u8>) -> Result<()> {
378 self.object_store
379 .write(path, data)
380 .await
381 .map(|_| ())
382 .context(StorageOperationSnafu {
383 operation: format!("write {}", path),
384 })
385 }
386
387 async fn file_exists(&self, path: &str) -> Result<bool> {
389 match self.object_store.stat(path).await {
390 Ok(_) => Ok(true),
391 Err(e) if e.kind() == object_store::ErrorKind::NotFound => Ok(false),
392 Err(e) => Err(e).context(StorageOperationSnafu {
393 operation: format!("check exists {}", path),
394 }),
395 }
396 }
397
398 #[cfg(test)]
399 pub async fn read_schema(&self) -> Result<SchemaSnapshot> {
400 let schemas_path = schema_index_path();
401 let schemas: Vec<SchemaDefinition> = if self.file_exists(&schemas_path).await? {
402 let data = self.read_file(&schemas_path).await?;
403 serde_json::from_slice(&data).context(ManifestParseSnafu)?
404 } else {
405 vec![]
406 };
407
408 Ok(SchemaSnapshot { schemas })
409 }
410}
411
412#[async_trait]
413impl SnapshotStorage for OpenDalStorage {
414 async fn exists(&self) -> Result<bool> {
415 self.file_exists(MANIFEST_FILE).await
416 }
417
418 async fn read_manifest(&self) -> Result<Manifest> {
419 ensure_snapshot_exists(self).await?;
420
421 let data = self.read_file(MANIFEST_FILE).await?;
422 serde_json::from_slice(&data).context(ManifestParseSnafu)
423 }
424
425 async fn write_manifest(&self, manifest: &Manifest) -> Result<()> {
426 let data = serde_json::to_vec_pretty(manifest).context(ManifestSerializeSnafu)?;
427 self.write_file(MANIFEST_FILE, data).await
428 }
429
430 async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()> {
431 let schemas_path = schema_index_path();
432 let schemas_data =
433 serde_json::to_vec_pretty(&schema.schemas).context(ManifestSerializeSnafu)?;
434 self.write_file(&schemas_path, schemas_data).await
435 }
436
437 async fn write_text(&self, path: &str, content: &str) -> Result<()> {
438 self.write_file(path, content.as_bytes().to_vec()).await
439 }
440
441 async fn read_text(&self, path: &str) -> Result<String> {
442 let data = self.read_file(path).await?;
443 String::from_utf8(data).context(TextDecodeSnafu)
444 }
445
446 async fn delete_snapshot(&self) -> Result<()> {
447 self.object_store
448 .remove_all("/")
449 .await
450 .context(StorageOperationSnafu {
451 operation: "delete snapshot",
452 })
453 }
454}
455
456#[cfg(test)]
457mod tests {
458 use std::collections::HashMap;
459 use std::path::Path;
460
461 use object_store::ObjectStore;
462 use object_store::services::Fs;
463 use tempfile::tempdir;
464 use url::Url;
465
466 use super::*;
467 use crate::data::export_v2::manifest::{DataFormat, TimeRange};
468 use crate::data::export_v2::schema::SchemaDefinition;
469
470 fn make_storage_with_rooted_fs(dir: &std::path::Path) -> OpenDalStorage {
471 let object_store = ObjectStore::new(Fs::default().root(dir.to_str().unwrap()))
472 .unwrap()
473 .finish();
474 OpenDalStorage::new_operator_rooted(
475 OpenDalStorage::finish_local_store(object_store),
476 Url::from_directory_path(dir).unwrap().as_ref(),
477 )
478 }
479
480 #[test]
481 fn test_validate_uri_valid() {
482 assert_eq!(validate_uri("s3://bucket/path").unwrap(), StorageScheme::S3);
483 assert_eq!(
484 validate_uri("oss://bucket/path").unwrap(),
485 StorageScheme::Oss
486 );
487 assert_eq!(
488 validate_uri("gs://bucket/path").unwrap(),
489 StorageScheme::Gcs
490 );
491 assert_eq!(
492 validate_uri("gcs://bucket/path").unwrap(),
493 StorageScheme::Gcs
494 );
495 assert_eq!(
496 validate_uri("azblob://container/path").unwrap(),
497 StorageScheme::Azblob
498 );
499 assert_eq!(
500 validate_uri("file:///tmp/backup").unwrap(),
501 StorageScheme::File
502 );
503 }
504
505 #[test]
506 fn test_validate_uri_invalid() {
507 assert!(validate_uri("/tmp/backup").is_err());
509 assert!(validate_uri("./backup").is_err());
510 assert!(validate_uri("backup").is_err());
511
512 assert!(validate_uri("ftp://server/path").is_err());
514 }
515
516 #[test]
517 fn test_extract_remote_location_requires_non_empty_root() {
518 assert!(extract_remote_location("s3://bucket").is_err());
519 assert!(extract_remote_location("s3://bucket/").is_err());
520 assert!(extract_remote_location("oss://bucket").is_err());
521 assert!(extract_remote_location("gs://bucket").is_err());
522 assert!(extract_remote_location("azblob://container").is_err());
523 }
524
525 #[cfg(not(windows))]
526 #[test]
527 fn test_extract_path_from_uri_unix_examples() {
528 assert_eq!(
529 extract_file_path_from_uri("file:///tmp/backup").unwrap(),
530 "/tmp/backup"
531 );
532 assert_eq!(
533 extract_file_path_from_uri("file://localhost/tmp/backup").unwrap(),
534 "/tmp/backup"
535 );
536 }
537
538 #[test]
539 fn test_extract_file_path_from_uri_rejects_file_host() {
540 assert!(extract_file_path_from_uri("file://tmp/backup").is_err());
541 }
542
543 #[test]
544 fn test_extract_file_path_from_uri_round_trips_directory_url() {
545 let dir = tempdir().unwrap();
546 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
547 let path = extract_file_path_from_uri(&uri).unwrap();
548
549 assert_eq!(Path::new(&path), dir.path());
550 }
551
552 #[tokio::test]
553 async fn test_read_manifest_reports_requested_uri() {
554 let dir = tempdir().unwrap();
555 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
556 let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
557
558 let error = storage.read_manifest().await.unwrap_err().to_string();
559
560 assert!(error.contains(uri.as_str()));
561 }
562
563 #[tokio::test]
564 async fn test_manifest_round_trip() {
565 let dir = tempdir().unwrap();
566 let storage = make_storage_with_rooted_fs(dir.path());
567
568 let manifest = Manifest::new_full(
569 "greptime".to_string(),
570 vec!["public".to_string()],
571 TimeRange::unbounded(),
572 DataFormat::Parquet,
573 );
574
575 storage.write_manifest(&manifest).await.unwrap();
576 let loaded = storage.read_manifest().await.unwrap();
577
578 assert_eq!(loaded.catalog, manifest.catalog);
579 assert_eq!(loaded.schemas, manifest.schemas);
580 assert_eq!(loaded.schema_only, manifest.schema_only);
581 assert_eq!(loaded.format, manifest.format);
582 assert_eq!(loaded.snapshot_id, manifest.snapshot_id);
583 }
584
585 #[tokio::test]
586 async fn test_schema_round_trip() {
587 let dir = tempdir().unwrap();
588 let storage = make_storage_with_rooted_fs(dir.path());
589
590 let mut snapshot = SchemaSnapshot::new();
591 snapshot.add_schema(SchemaDefinition {
592 catalog: "greptime".to_string(),
593 name: "test_db".to_string(),
594 options: HashMap::from([("ttl".to_string(), "7d".to_string())]),
595 });
596
597 storage.write_schema(&snapshot).await.unwrap();
598 let loaded = storage.read_schema().await.unwrap();
599
600 assert_eq!(loaded, snapshot);
601 }
602
603 #[tokio::test]
604 async fn test_text_round_trip() {
605 let dir = tempdir().unwrap();
606 let storage = make_storage_with_rooted_fs(dir.path());
607 let content = "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX);";
608
609 storage
610 .write_text("schema/ddl/public.sql", content)
611 .await
612 .unwrap();
613 let loaded = storage.read_text("schema/ddl/public.sql").await.unwrap();
614
615 assert_eq!(loaded, content);
616 }
617
618 #[tokio::test]
619 async fn test_read_text_rejects_invalid_utf8() {
620 let dir = tempdir().unwrap();
621 let storage = make_storage_with_rooted_fs(dir.path());
622
623 storage
624 .write_file("schema/ddl/public.sql", vec![0xff, 0xfe, 0xfd])
625 .await
626 .unwrap();
627
628 let error = storage
629 .read_text("schema/ddl/public.sql")
630 .await
631 .unwrap_err();
632 assert!(error.to_string().contains("UTF-8"));
633 }
634
635 #[tokio::test]
636 async fn test_exists_follows_manifest_presence() {
637 let dir = tempdir().unwrap();
638 let storage = make_storage_with_rooted_fs(dir.path());
639
640 assert!(!storage.exists().await.unwrap());
641
642 storage
643 .write_manifest(&Manifest::new_schema_only(
644 "greptime".to_string(),
645 vec!["public".to_string()],
646 ))
647 .await
648 .unwrap();
649
650 assert!(storage.exists().await.unwrap());
651 }
652
653 #[tokio::test]
654 async fn test_delete_snapshot_only_removes_rooted_contents() {
655 let parent = tempdir().unwrap();
656 let snapshot_root = parent.path().join("snapshot");
657 let sibling = parent.path().join("sibling");
658 std::fs::create_dir_all(&snapshot_root).unwrap();
659 std::fs::create_dir_all(&sibling).unwrap();
660 std::fs::write(snapshot_root.join("manifest.json"), b"{}").unwrap();
661 std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
662
663 let storage = make_storage_with_rooted_fs(&snapshot_root);
664 storage.delete_snapshot().await.unwrap();
665
666 assert!(!snapshot_root.join("manifest.json").exists());
667 assert!(sibling.join("keep.txt").exists());
668 }
669}