1use std::collections::BTreeSet;
21use std::path::Component;
22
23use async_trait::async_trait;
24use futures::TryStreamExt;
25use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
26use object_store::util::{with_instrument_layers, with_retry_layers};
27use object_store::{
28 AzblobConnection, ErrorKind, GcsConnection, ObjectStore, OssConnection, S3Connection,
29};
30use snafu::ResultExt;
31use url::Url;
32
33use crate::common::ObjectStoreConfig;
34use crate::data::export_v2::error::{
35 BuildObjectStoreSnafu, InvalidUriSnafu, ManifestParseSnafu, ManifestSerializeSnafu, Result,
36 SnapshotNotFoundSnafu, StorageOperationSnafu, TextDecodeSnafu, UnsupportedSchemeSnafu,
37 UrlParseSnafu,
38};
39use crate::data::export_v2::manifest::{MANIFEST_FILE, Manifest};
40#[cfg(test)]
41use crate::data::export_v2::schema::SchemaDefinition;
42use crate::data::export_v2::schema::{SCHEMA_DIR, SCHEMAS_FILE, SchemaSnapshot};
43
44struct RemoteLocation {
45 bucket_or_container: String,
46 root: String,
47}
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum StorageScheme {
52 S3,
54 Oss,
56 Gcs,
58 Azblob,
60 File,
62}
63
64impl StorageScheme {
65 pub fn from_uri(uri: &str) -> Result<Self> {
67 let url = Url::parse(uri).context(UrlParseSnafu)?;
68
69 match url.scheme() {
70 "s3" => Ok(Self::S3),
71 "oss" => Ok(Self::Oss),
72 "gs" | "gcs" => Ok(Self::Gcs),
73 "azblob" => Ok(Self::Azblob),
74 "file" => Ok(Self::File),
75 scheme => UnsupportedSchemeSnafu { scheme }.fail(),
76 }
77 }
78}
79
80fn extract_remote_location_with_root_policy(
82 uri: &str,
83 allow_empty_root: bool,
84) -> Result<RemoteLocation> {
85 let url = Url::parse(uri).context(UrlParseSnafu)?;
86 let bucket_or_container = url.host_str().unwrap_or("").to_string();
87 if bucket_or_container.is_empty() {
88 return InvalidUriSnafu {
89 uri,
90 reason: "URI must include bucket/container in host",
91 }
92 .fail();
93 }
94
95 let root = url.path().trim_start_matches('/').to_string();
96 if root.is_empty() && !allow_empty_root {
97 return InvalidUriSnafu {
98 uri,
99 reason: "snapshot URI must include a non-empty path after the bucket/container",
100 }
101 .fail();
102 }
103
104 Ok(RemoteLocation {
105 bucket_or_container,
106 root,
107 })
108}
109
110pub fn validate_uri(uri: &str) -> Result<StorageScheme> {
123 if !uri.contains("://") {
125 return InvalidUriSnafu {
126 uri,
127 reason: "URI must have a scheme (e.g., s3://, file://). Bare paths are not supported.",
128 }
129 .fail();
130 }
131
132 StorageScheme::from_uri(uri)
133}
134
135pub fn validate_snapshot_uri(uri: &str) -> Result<StorageScheme> {
143 let scheme = validate_uri(uri)?;
144 reject_query_or_fragment(uri)?;
145 match scheme {
146 StorageScheme::File => validate_file_snapshot_uri(uri)?,
147 StorageScheme::S3 | StorageScheme::Oss | StorageScheme::Gcs | StorageScheme::Azblob => {
148 extract_remote_location_with_root_policy(uri, false)?;
149 }
150 }
151 Ok(scheme)
152}
153
154fn reject_query_or_fragment(uri: &str) -> Result<()> {
155 let url = Url::parse(uri).context(UrlParseSnafu)?;
156 if url.query().is_some() || url.fragment().is_some() {
157 return InvalidUriSnafu {
158 uri,
159 reason: "snapshot URI must not include query or fragment",
160 }
161 .fail();
162 }
163
164 Ok(())
165}
166
167fn validate_file_snapshot_uri(uri: &str) -> Result<()> {
168 if has_explicit_dot_segment(uri) {
169 return InvalidUriSnafu {
170 uri,
171 reason: "file snapshot URI must not contain '.' or '..' path segments",
172 }
173 .fail();
174 }
175
176 let path = extract_file_path_from_uri(uri)?;
177 let mut normal_component_count = 0;
178
179 for component in std::path::Path::new(&path).components() {
184 match component {
185 Component::Normal(_) => normal_component_count += 1,
186 Component::CurDir | Component::ParentDir => {
187 return InvalidUriSnafu {
188 uri,
189 reason: "file snapshot URI must not contain '.' or '..' path segments",
190 }
191 .fail();
192 }
193 Component::Prefix(_) | Component::RootDir => {}
194 }
195 }
196
197 if normal_component_count < 2 {
198 return InvalidUriSnafu {
199 uri,
200 reason: "file snapshot URI must point to a directory at least two levels deep",
201 }
202 .fail();
203 }
204
205 Ok(())
206}
207
208fn has_explicit_dot_segment(uri: &str) -> bool {
209 let without_fragment = uri.split_once('#').map_or(uri, |(path, _)| path);
213 let path = without_fragment
214 .split_once('?')
215 .map_or(without_fragment, |(path, _)| path);
216
217 path.split('/')
218 .any(|segment| segment == "." || segment == "..")
219}
220
221fn schema_index_path() -> String {
222 format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE)
223}
224
225fn extract_file_path_from_uri(uri: &str) -> Result<String> {
227 let url = Url::parse(uri).context(UrlParseSnafu)?;
228
229 match url.host_str() {
230 Some(host) if !host.is_empty() && host != "localhost" => InvalidUriSnafu {
231 uri,
232 reason: "file:// URI must use an absolute path like file:///tmp/backup",
233 }
234 .fail(),
235 _ => url
236 .to_file_path()
237 .map_err(|_| {
238 InvalidUriSnafu {
239 uri,
240 reason: "file:// URI must use an absolute path like file:///tmp/backup",
241 }
242 .build()
243 })
244 .map(|path| path.to_string_lossy().into_owned()),
245 }
246}
247
248async fn ensure_snapshot_exists(storage: &OpenDalStorage) -> Result<()> {
249 if storage.exists().await? {
250 Ok(())
251 } else {
252 SnapshotNotFoundSnafu {
253 uri: storage.target_uri.as_str(),
254 }
255 .fail()
256 }
257}
258
259#[async_trait]
263pub trait SnapshotStorage: Send + Sync {
264 async fn exists(&self) -> Result<bool>;
266
267 async fn read_manifest(&self) -> Result<Manifest>;
269
270 async fn write_manifest(&self, manifest: &Manifest) -> Result<()>;
272
273 async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()>;
275
276 async fn write_text(&self, path: &str, content: &str) -> Result<()>;
278
279 async fn read_text(&self, path: &str) -> Result<String>;
281
282 async fn create_dir_all(&self, path: &str) -> Result<()>;
284
285 async fn list_files_recursive(&self, prefix: &str) -> Result<Vec<String>>;
287
288 async fn delete_snapshot(&self) -> Result<()>;
290}
291
292pub struct OpenDalStorage {
294 object_store: ObjectStore,
295 target_uri: String,
296}
297
298impl OpenDalStorage {
299 fn new_operator_rooted(object_store: ObjectStore, target_uri: &str) -> Self {
300 Self {
301 object_store,
302 target_uri: target_uri.to_string(),
303 }
304 }
305
306 fn finish_local_store(object_store: ObjectStore) -> ObjectStore {
307 with_instrument_layers(object_store, false)
308 }
309
310 fn finish_remote_store(object_store: ObjectStore) -> ObjectStore {
311 with_instrument_layers(with_retry_layers(object_store), false)
312 }
313
314 fn ensure_backend_enabled(uri: &str, enabled: bool, reason: &'static str) -> Result<()> {
315 if enabled {
316 Ok(())
317 } else {
318 InvalidUriSnafu { uri, reason }.fail()
319 }
320 }
321
322 fn validate_remote_config<E: std::fmt::Display>(
323 uri: &str,
324 backend: &str,
325 result: std::result::Result<(), E>,
326 ) -> Result<()> {
327 result.map_err(|error| {
328 InvalidUriSnafu {
329 uri,
330 reason: format!("invalid {} config: {}", backend, error),
331 }
332 .build()
333 })
334 }
335
336 pub fn from_file_uri(uri: &str) -> Result<Self> {
338 let path = extract_file_path_from_uri(uri)?;
339
340 let builder = Fs::default().root(&path);
341 let object_store = ObjectStore::new(builder)
342 .context(BuildObjectStoreSnafu)?
343 .finish();
344 Ok(Self::new_operator_rooted(
345 Self::finish_local_store(object_store),
346 uri,
347 ))
348 }
349
350 fn from_file_uri_with_config(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
351 if storage.enable_s3 || storage.enable_oss || storage.enable_gcs || storage.enable_azblob {
352 return InvalidUriSnafu {
353 uri,
354 reason: "file:// cannot be used with remote storage flags",
355 }
356 .fail();
357 }
358
359 Self::from_file_uri(uri)
360 }
361
362 fn from_s3_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
363 Self::from_s3_uri_with_root_policy(uri, storage, false)
364 }
365
366 fn from_s3_uri_with_root_policy(
367 uri: &str,
368 storage: &ObjectStoreConfig,
369 allow_empty_root: bool,
370 ) -> Result<Self> {
371 Self::ensure_backend_enabled(
372 uri,
373 storage.enable_s3,
374 "s3:// requires --s3 and related options",
375 )?;
376
377 let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
378 let mut config = storage.s3.clone();
379 config.s3_bucket = location.bucket_or_container;
380 config.s3_root = location.root;
381 Self::validate_remote_config(uri, "s3", config.validate())?;
382
383 let conn: S3Connection = config.into();
384 let object_store = ObjectStore::new(S3::from(&conn))
385 .context(BuildObjectStoreSnafu)?
386 .finish();
387 Ok(Self::new_operator_rooted(
388 Self::finish_remote_store(object_store),
389 uri,
390 ))
391 }
392
393 fn from_oss_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
394 Self::from_oss_uri_with_root_policy(uri, storage, false)
395 }
396
397 fn from_oss_uri_with_root_policy(
398 uri: &str,
399 storage: &ObjectStoreConfig,
400 allow_empty_root: bool,
401 ) -> Result<Self> {
402 Self::ensure_backend_enabled(
403 uri,
404 storage.enable_oss,
405 "oss:// requires --oss and related options",
406 )?;
407
408 let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
409 let mut config = storage.oss.clone();
410 config.oss_bucket = location.bucket_or_container;
411 config.oss_root = location.root;
412 Self::validate_remote_config(uri, "oss", config.validate())?;
413
414 let conn: OssConnection = config.into();
415 let object_store = ObjectStore::new(Oss::from(&conn))
416 .context(BuildObjectStoreSnafu)?
417 .finish();
418 Ok(Self::new_operator_rooted(
419 Self::finish_remote_store(object_store),
420 uri,
421 ))
422 }
423
424 fn from_gcs_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
425 Self::from_gcs_uri_with_root_policy(uri, storage, false)
426 }
427
428 fn from_gcs_uri_with_root_policy(
429 uri: &str,
430 storage: &ObjectStoreConfig,
431 allow_empty_root: bool,
432 ) -> Result<Self> {
433 Self::ensure_backend_enabled(
434 uri,
435 storage.enable_gcs,
436 "gs:// or gcs:// requires --gcs and related options",
437 )?;
438
439 let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
440 let mut config = storage.gcs.clone();
441 config.gcs_bucket = location.bucket_or_container;
442 config.gcs_root = location.root;
443 if allow_empty_root && config.gcs_root.is_empty() {
445 Self::validate_gcs_parent_config(uri, &config)?;
446 } else {
447 Self::validate_remote_config(uri, "gcs", config.validate())?;
448 }
449
450 let conn: GcsConnection = config.into();
451 let object_store = ObjectStore::new(Gcs::from(&conn))
452 .context(BuildObjectStoreSnafu)?
453 .finish();
454 Ok(Self::new_operator_rooted(
455 Self::finish_remote_store(object_store),
456 uri,
457 ))
458 }
459
460 fn validate_gcs_parent_config(
461 uri: &str,
462 config: &crate::common::PrefixedGcsConnection,
463 ) -> Result<()> {
464 if config.gcs_bucket.is_empty() {
465 return InvalidUriSnafu {
466 uri,
467 reason: "invalid gcs config: GCS bucket must be set when --gcs is enabled.",
468 }
469 .fail();
470 }
471 if config.gcs_scope.is_empty() {
472 return InvalidUriSnafu {
473 uri,
474 reason: "invalid gcs config: GCS scope must be set when --gcs is enabled.",
475 }
476 .fail();
477 }
478 Ok(())
479 }
480
481 fn from_azblob_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
482 Self::from_azblob_uri_with_root_policy(uri, storage, false)
483 }
484
485 fn from_azblob_uri_with_root_policy(
486 uri: &str,
487 storage: &ObjectStoreConfig,
488 allow_empty_root: bool,
489 ) -> Result<Self> {
490 Self::ensure_backend_enabled(
491 uri,
492 storage.enable_azblob,
493 "azblob:// requires --azblob and related options",
494 )?;
495
496 let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
497 let mut config = storage.azblob.clone();
498 config.azblob_container = location.bucket_or_container;
499 config.azblob_root = location.root;
500 Self::validate_remote_config(uri, "azblob", config.validate())?;
501
502 let conn: AzblobConnection = config.into();
503 let object_store = ObjectStore::new(Azblob::from(&conn))
504 .context(BuildObjectStoreSnafu)?
505 .finish();
506 Ok(Self::new_operator_rooted(
507 Self::finish_remote_store(object_store),
508 uri,
509 ))
510 }
511
512 pub fn from_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
514 match StorageScheme::from_uri(uri)? {
515 StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
516 StorageScheme::S3 => Self::from_s3_uri(uri, storage),
517 StorageScheme::Oss => Self::from_oss_uri(uri, storage),
518 StorageScheme::Gcs => Self::from_gcs_uri(uri, storage),
519 StorageScheme::Azblob => Self::from_azblob_uri(uri, storage),
520 }
521 }
522
523 pub fn from_parent_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
529 match StorageScheme::from_uri(uri)? {
530 StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
531 StorageScheme::S3 => Self::from_s3_uri_with_root_policy(uri, storage, true),
532 StorageScheme::Oss => Self::from_oss_uri_with_root_policy(uri, storage, true),
533 StorageScheme::Gcs => Self::from_gcs_uri_with_root_policy(uri, storage, true),
534 StorageScheme::Azblob => Self::from_azblob_uri_with_root_policy(uri, storage, true),
535 }
536 }
537
538 async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
540 let data = self
541 .object_store
542 .read(path)
543 .await
544 .context(StorageOperationSnafu {
545 operation: format!("read {}", path),
546 })?;
547 Ok(data.to_vec())
548 }
549
550 pub(crate) async fn read_file_if_exists(&self, path: &str) -> Result<Option<Vec<u8>>> {
552 match self.object_store.read(path).await {
553 Ok(data) => Ok(Some(data.to_vec())),
554 Err(error) if error.kind() == ErrorKind::NotFound => Ok(None),
555 Err(error) => Err(error).context(StorageOperationSnafu {
556 operation: format!("read {}", path),
557 }),
558 }
559 }
560
561 async fn write_file(&self, path: &str, data: Vec<u8>) -> Result<()> {
563 self.object_store
564 .write(path, data)
565 .await
566 .map(|_| ())
567 .context(StorageOperationSnafu {
568 operation: format!("write {}", path),
569 })
570 }
571
572 pub(crate) async fn file_exists(&self, path: &str) -> Result<bool> {
574 match self.object_store.stat(path).await {
575 Ok(metadata) => Ok(!metadata.is_dir()),
576 Err(e) if e.kind() == object_store::ErrorKind::NotFound => Ok(false),
577 Err(e) => Err(e).context(StorageOperationSnafu {
578 operation: format!("check exists {}", path),
579 }),
580 }
581 }
582
583 pub(crate) async fn for_each_file_recursive<F>(&self, prefix: &str, mut f: F) -> Result<()>
586 where
587 F: FnMut(String) -> Result<()>,
588 {
589 let mut lister = match self.object_store.lister_with(prefix).recursive(true).await {
590 Ok(lister) => lister,
591 Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()),
592 Err(error) => {
593 return Err(error).context(StorageOperationSnafu {
594 operation: format!("list {}", prefix),
595 });
596 }
597 };
598
599 while let Some(entry) = lister.try_next().await.context(StorageOperationSnafu {
600 operation: format!("list {}", prefix),
601 })? {
602 if entry.metadata().is_dir() {
603 continue;
604 }
605 f(entry.path().to_string())?;
606 }
607
608 Ok(())
609 }
610
611 pub(crate) async fn list_direct_child_dirs(&self) -> Result<Vec<String>> {
613 let mut lister = match self.object_store.lister_with("/").recursive(false).await {
614 Ok(lister) => lister,
615 Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
616 Err(error) => {
617 return Err(error).context(StorageOperationSnafu {
618 operation: "list /",
619 });
620 }
621 };
622
623 let mut dirs = BTreeSet::new();
624 while let Some(entry) = lister.try_next().await.context(StorageOperationSnafu {
625 operation: "list /",
626 })? {
627 let path = entry.path().trim_matches('/');
628 if path.is_empty() {
629 continue;
630 }
631
632 if entry.metadata().is_dir()
633 && let Some(name) = path.split('/').next()
634 {
635 dirs.insert(name.to_string());
636 }
637 }
638
639 Ok(dirs.into_iter().collect())
640 }
641
642 #[cfg(test)]
643 pub async fn read_schema(&self) -> Result<SchemaSnapshot> {
644 let schemas_path = schema_index_path();
645 let schemas: Vec<SchemaDefinition> = if self.file_exists(&schemas_path).await? {
646 let data = self.read_file(&schemas_path).await?;
647 serde_json::from_slice(&data).context(ManifestParseSnafu)?
648 } else {
649 vec![]
650 };
651
652 Ok(SchemaSnapshot { schemas })
653 }
654}
655
656#[async_trait]
657impl SnapshotStorage for OpenDalStorage {
658 async fn exists(&self) -> Result<bool> {
659 self.file_exists(MANIFEST_FILE).await
660 }
661
662 async fn read_manifest(&self) -> Result<Manifest> {
663 ensure_snapshot_exists(self).await?;
664
665 let data = self.read_file(MANIFEST_FILE).await?;
666 serde_json::from_slice(&data).context(ManifestParseSnafu)
667 }
668
669 async fn write_manifest(&self, manifest: &Manifest) -> Result<()> {
670 let data = serde_json::to_vec_pretty(manifest).context(ManifestSerializeSnafu)?;
671 self.write_file(MANIFEST_FILE, data).await
672 }
673
674 async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()> {
675 let schemas_path = schema_index_path();
676 let schemas_data =
677 serde_json::to_vec_pretty(&schema.schemas).context(ManifestSerializeSnafu)?;
678 self.write_file(&schemas_path, schemas_data).await
679 }
680
681 async fn write_text(&self, path: &str, content: &str) -> Result<()> {
682 self.write_file(path, content.as_bytes().to_vec()).await
683 }
684
685 async fn read_text(&self, path: &str) -> Result<String> {
686 let data = self.read_file(path).await?;
687 String::from_utf8(data).context(TextDecodeSnafu)
688 }
689
690 async fn create_dir_all(&self, path: &str) -> Result<()> {
691 self.object_store
692 .create_dir(path)
693 .await
694 .context(StorageOperationSnafu {
695 operation: format!("create dir {}", path),
696 })
697 }
698
699 async fn list_files_recursive(&self, prefix: &str) -> Result<Vec<String>> {
700 let mut files = Vec::new();
701 self.for_each_file_recursive(prefix, |path| {
702 files.push(path);
703 Ok(())
704 })
705 .await?;
706 Ok(files)
707 }
708
709 async fn delete_snapshot(&self) -> Result<()> {
710 self.object_store
711 .delete_with("/")
712 .recursive(true)
713 .await
714 .context(StorageOperationSnafu {
715 operation: "delete snapshot",
716 })
717 }
718}
719
720#[cfg(test)]
721mod tests {
722 use std::collections::HashMap;
723 use std::path::Path;
724
725 use object_store::ObjectStore;
726 use object_store::services::Fs;
727 use tempfile::tempdir;
728 use url::Url;
729
730 use super::*;
731 use crate::data::export_v2::manifest::{DataFormat, TimeRange};
732 use crate::data::export_v2::schema::SchemaDefinition;
733
734 fn make_storage_with_rooted_fs(dir: &std::path::Path) -> OpenDalStorage {
735 let object_store = ObjectStore::new(Fs::default().root(dir.to_str().unwrap()))
736 .unwrap()
737 .finish();
738 OpenDalStorage::new_operator_rooted(
739 OpenDalStorage::finish_local_store(object_store),
740 Url::from_directory_path(dir).unwrap().as_ref(),
741 )
742 }
743
744 #[test]
745 fn test_validate_uri_valid() {
746 assert_eq!(validate_uri("s3://bucket/path").unwrap(), StorageScheme::S3);
747 assert_eq!(
748 validate_uri("oss://bucket/path").unwrap(),
749 StorageScheme::Oss
750 );
751 assert_eq!(
752 validate_uri("gs://bucket/path").unwrap(),
753 StorageScheme::Gcs
754 );
755 assert_eq!(
756 validate_uri("gcs://bucket/path").unwrap(),
757 StorageScheme::Gcs
758 );
759 assert_eq!(
760 validate_uri("azblob://container/path").unwrap(),
761 StorageScheme::Azblob
762 );
763 assert_eq!(
764 validate_uri("file:///tmp/backup").unwrap(),
765 StorageScheme::File
766 );
767 }
768
769 #[test]
770 fn test_validate_uri_invalid() {
771 assert!(validate_uri("/tmp/backup").is_err());
773 assert!(validate_uri("./backup").is_err());
774 assert!(validate_uri("backup").is_err());
775
776 assert!(validate_uri("ftp://server/path").is_err());
778 }
779
780 #[test]
781 fn test_extract_remote_location_requires_non_empty_root() {
782 assert!(extract_remote_location_with_root_policy("s3://bucket", false).is_err());
783 assert!(extract_remote_location_with_root_policy("s3://bucket/", false).is_err());
784 assert!(extract_remote_location_with_root_policy("oss://bucket", false).is_err());
785 assert!(extract_remote_location_with_root_policy("gs://bucket", false).is_err());
786 assert!(extract_remote_location_with_root_policy("azblob://container", false).is_err());
787 }
788
789 #[test]
790 fn test_extract_remote_location_allows_empty_root_when_permitted() {
791 let location = extract_remote_location_with_root_policy("s3://bucket", true).unwrap();
792 assert_eq!(location.bucket_or_container, "bucket");
793 assert_eq!(location.root, "");
794
795 let location =
796 extract_remote_location_with_root_policy("azblob://container/", true).unwrap();
797 assert_eq!(location.bucket_or_container, "container");
798 assert_eq!(location.root, "");
799 }
800
801 #[test]
802 fn test_parent_storage_allows_s3_bucket_root() {
803 let mut storage = ObjectStoreConfig {
804 enable_s3: true,
805 ..Default::default()
806 };
807 storage.s3.s3_region = Some("us-east-1".to_string());
808
809 assert!(OpenDalStorage::from_uri("s3://bucket", &storage).is_err());
810 assert!(OpenDalStorage::from_parent_uri("s3://bucket", &storage).is_ok());
811 }
812
813 #[test]
814 fn test_validate_snapshot_uri_rejects_dangerous_roots() {
815 assert!(validate_snapshot_uri("s3://bucket").is_err());
816 assert!(validate_snapshot_uri("s3://bucket/").is_err());
817 assert!(validate_snapshot_uri("oss://bucket").is_err());
818 assert!(validate_snapshot_uri("gs://bucket").is_err());
819 assert!(validate_snapshot_uri("azblob://container").is_err());
820 assert!(validate_snapshot_uri("s3://bucket/snapshot?version=1").is_err());
821 assert!(validate_snapshot_uri("file:///tmp/backup#fragment").is_err());
822 assert!(validate_snapshot_uri("file:///").is_err());
823 assert!(validate_snapshot_uri("file:///tmp").is_err());
824 assert!(validate_snapshot_uri("file:///tmp/backup/.").is_err());
825 assert!(validate_snapshot_uri("file:///tmp/backup/..").is_err());
826 }
827
828 #[test]
829 fn test_validate_snapshot_uri_accepts_snapshot_paths() {
830 assert_eq!(
831 validate_snapshot_uri("s3://bucket/snapshots/prod").unwrap(),
832 StorageScheme::S3
833 );
834
835 let dir = tempdir().unwrap();
836 let snapshot = dir.path().join("snapshot");
837 std::fs::create_dir_all(&snapshot).unwrap();
838 let uri = Url::from_directory_path(snapshot).unwrap().to_string();
839 assert_eq!(validate_snapshot_uri(&uri).unwrap(), StorageScheme::File);
840 }
841
842 #[cfg(windows)]
843 #[test]
844 fn test_validate_snapshot_uri_windows_drive_prefix_depth() {
845 assert!(validate_snapshot_uri("file:///C:/").is_err());
846 assert!(validate_snapshot_uri("file:///C:/Users").is_err());
847 assert!(validate_snapshot_uri("file:///C:/Users/snapshot").is_ok());
848 }
849
850 #[cfg(not(windows))]
851 #[test]
852 fn test_extract_path_from_uri_unix_examples() {
853 assert_eq!(
854 extract_file_path_from_uri("file:///tmp/backup").unwrap(),
855 "/tmp/backup"
856 );
857 assert_eq!(
858 extract_file_path_from_uri("file://localhost/tmp/backup").unwrap(),
859 "/tmp/backup"
860 );
861 assert_eq!(
862 extract_file_path_from_uri("file:///tmp/my%20backup").unwrap(),
863 "/tmp/my backup"
864 );
865 assert_eq!(
866 extract_file_path_from_uri("file://localhost/tmp/my%20backup").unwrap(),
867 "/tmp/my backup"
868 );
869 }
870
871 #[test]
872 fn test_extract_file_path_from_uri_rejects_file_host() {
873 assert!(extract_file_path_from_uri("file://tmp/backup").is_err());
874 }
875
876 #[test]
877 fn test_extract_file_path_from_uri_round_trips_directory_url() {
878 let dir = tempdir().unwrap();
879 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
880 let path = extract_file_path_from_uri(&uri).unwrap();
881
882 assert_eq!(Path::new(&path), dir.path());
883 }
884
885 #[tokio::test]
886 async fn test_read_manifest_reports_requested_uri() {
887 let dir = tempdir().unwrap();
888 let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
889 let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
890
891 let error = storage.read_manifest().await.unwrap_err().to_string();
892
893 assert!(error.contains(uri.as_str()));
894 }
895
896 #[tokio::test]
897 async fn test_manifest_round_trip() {
898 let dir = tempdir().unwrap();
899 let storage = make_storage_with_rooted_fs(dir.path());
900
901 let manifest = Manifest::new_full(
902 "greptime".to_string(),
903 vec!["public".to_string()],
904 TimeRange::unbounded(),
905 DataFormat::Parquet,
906 );
907
908 storage.write_manifest(&manifest).await.unwrap();
909 let loaded = storage.read_manifest().await.unwrap();
910
911 assert_eq!(loaded.catalog, manifest.catalog);
912 assert_eq!(loaded.schemas, manifest.schemas);
913 assert_eq!(loaded.schema_only, manifest.schema_only);
914 assert_eq!(loaded.format, manifest.format);
915 assert_eq!(loaded.snapshot_id, manifest.snapshot_id);
916 }
917
918 #[tokio::test]
919 async fn test_schema_round_trip() {
920 let dir = tempdir().unwrap();
921 let storage = make_storage_with_rooted_fs(dir.path());
922
923 let mut snapshot = SchemaSnapshot::new();
924 snapshot.add_schema(SchemaDefinition {
925 catalog: "greptime".to_string(),
926 name: "test_db".to_string(),
927 options: HashMap::from([("ttl".to_string(), "7d".to_string())]),
928 });
929
930 storage.write_schema(&snapshot).await.unwrap();
931 let loaded = storage.read_schema().await.unwrap();
932
933 assert_eq!(loaded, snapshot);
934 }
935
936 #[tokio::test]
937 async fn test_text_round_trip() {
938 let dir = tempdir().unwrap();
939 let storage = make_storage_with_rooted_fs(dir.path());
940 let content = "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX);";
941
942 storage
943 .write_text("schema/ddl/public.sql", content)
944 .await
945 .unwrap();
946 let loaded = storage.read_text("schema/ddl/public.sql").await.unwrap();
947
948 assert_eq!(loaded, content);
949 }
950
951 #[tokio::test]
952 async fn test_read_text_rejects_invalid_utf8() {
953 let dir = tempdir().unwrap();
954 let storage = make_storage_with_rooted_fs(dir.path());
955
956 storage
957 .write_file("schema/ddl/public.sql", vec![0xff, 0xfe, 0xfd])
958 .await
959 .unwrap();
960
961 let error = storage
962 .read_text("schema/ddl/public.sql")
963 .await
964 .unwrap_err();
965 assert!(error.to_string().contains("UTF-8"));
966 }
967
968 #[tokio::test]
969 async fn test_exists_follows_manifest_presence() {
970 let dir = tempdir().unwrap();
971 let storage = make_storage_with_rooted_fs(dir.path());
972
973 assert!(!storage.exists().await.unwrap());
974
975 storage
976 .write_manifest(&Manifest::new_schema_only(
977 "greptime".to_string(),
978 vec!["public".to_string()],
979 ))
980 .await
981 .unwrap();
982
983 assert!(storage.exists().await.unwrap());
984 }
985
986 #[tokio::test]
987 async fn test_delete_snapshot_only_removes_rooted_contents() {
988 let parent = tempdir().unwrap();
989 let snapshot_root = parent.path().join("snapshot");
990 let sibling = parent.path().join("sibling");
991 std::fs::create_dir_all(&snapshot_root).unwrap();
992 std::fs::create_dir_all(&sibling).unwrap();
993 std::fs::write(snapshot_root.join("manifest.json"), b"{}").unwrap();
994 std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
995
996 let storage = make_storage_with_rooted_fs(&snapshot_root);
997 storage.delete_snapshot().await.unwrap();
998
999 assert!(!snapshot_root.join("manifest.json").exists());
1000 assert!(sibling.join("keep.txt").exists());
1001 }
1002}