Skip to main content

cli/data/
snapshot_storage.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Storage abstraction for Export/Import V2.
16//!
17//! This module provides a unified interface for reading and writing snapshot data
18//! to various storage backends (S3, OSS, GCS, Azure Blob, local filesystem).
19
20use std::collections::BTreeSet;
21
22use async_trait::async_trait;
23use futures::TryStreamExt;
24use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
25use object_store::util::{with_instrument_layers, with_retry_layers};
26use object_store::{
27    AzblobConnection, ErrorKind, GcsConnection, ObjectStore, OssConnection, S3Connection,
28};
29use snafu::ResultExt;
30use url::Url;
31
32use crate::common::ObjectStoreConfig;
33use crate::data::export_v2::error::{
34    BuildObjectStoreSnafu, InvalidUriSnafu, ManifestParseSnafu, ManifestSerializeSnafu, Result,
35    SnapshotNotFoundSnafu, StorageOperationSnafu, TextDecodeSnafu, UnsupportedSchemeSnafu,
36    UrlParseSnafu,
37};
38use crate::data::export_v2::manifest::{MANIFEST_FILE, Manifest};
39#[cfg(test)]
40use crate::data::export_v2::schema::SchemaDefinition;
41use crate::data::export_v2::schema::{SCHEMA_DIR, SCHEMAS_FILE, SchemaSnapshot};
42
43struct RemoteLocation {
44    bucket_or_container: String,
45    root: String,
46}
47
48/// URI schemes supported for snapshot storage.
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum StorageScheme {
51    /// Amazon S3.
52    S3,
53    /// Alibaba Cloud OSS.
54    Oss,
55    /// Google Cloud Storage.
56    Gcs,
57    /// Azure Blob Storage.
58    Azblob,
59    /// Local filesystem (file://).
60    File,
61}
62
63impl StorageScheme {
64    /// Parses storage scheme from URI.
65    pub fn from_uri(uri: &str) -> Result<Self> {
66        let url = Url::parse(uri).context(UrlParseSnafu)?;
67
68        match url.scheme() {
69            "s3" => Ok(Self::S3),
70            "oss" => Ok(Self::Oss),
71            "gs" | "gcs" => Ok(Self::Gcs),
72            "azblob" => Ok(Self::Azblob),
73            "file" => Ok(Self::File),
74            scheme => UnsupportedSchemeSnafu { scheme }.fail(),
75        }
76    }
77}
78
79/// Extracts bucket/container and root path from a URI.
80fn extract_remote_location_with_root_policy(
81    uri: &str,
82    allow_empty_root: bool,
83) -> Result<RemoteLocation> {
84    let url = Url::parse(uri).context(UrlParseSnafu)?;
85    let bucket_or_container = url.host_str().unwrap_or("").to_string();
86    if bucket_or_container.is_empty() {
87        return InvalidUriSnafu {
88            uri,
89            reason: "URI must include bucket/container in host",
90        }
91        .fail();
92    }
93
94    let root = url.path().trim_start_matches('/').to_string();
95    if root.is_empty() && !allow_empty_root {
96        return InvalidUriSnafu {
97            uri,
98            reason: "snapshot URI must include a non-empty path after the bucket/container",
99        }
100        .fail();
101    }
102
103    Ok(RemoteLocation {
104        bucket_or_container,
105        root,
106    })
107}
108
109/// Validates that a URI has a proper scheme.
110///
111/// Rejects bare paths (e.g., `/tmp/backup`, `./backup`) because:
112/// - Schema export (CLI) and data export (server) run in different processes
113/// - Using bare paths would split the snapshot across machines
114///
115/// Supported URI schemes:
116/// - `s3://bucket/path` - Amazon S3
117/// - `oss://bucket/path` - Alibaba Cloud OSS
118/// - `gs://bucket/path` - Google Cloud Storage
119/// - `azblob://container/path` - Azure Blob Storage
120/// - `file:///absolute/path` - Local filesystem
121pub fn validate_uri(uri: &str) -> Result<StorageScheme> {
122    // Must have a scheme
123    if !uri.contains("://") {
124        return InvalidUriSnafu {
125            uri,
126            reason: "URI must have a scheme (e.g., s3://, file://). Bare paths are not supported.",
127        }
128        .fail();
129    }
130
131    StorageScheme::from_uri(uri)
132}
133
134fn schema_index_path() -> String {
135    format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE)
136}
137
138/// Extracts the absolute filesystem path from a file:// URI.
139fn extract_file_path_from_uri(uri: &str) -> Result<String> {
140    let url = Url::parse(uri).context(UrlParseSnafu)?;
141
142    match url.host_str() {
143        Some(host) if !host.is_empty() && host != "localhost" => InvalidUriSnafu {
144            uri,
145            reason: "file:// URI must use an absolute path like file:///tmp/backup",
146        }
147        .fail(),
148        _ => url
149            .to_file_path()
150            .map_err(|_| {
151                InvalidUriSnafu {
152                    uri,
153                    reason: "file:// URI must use an absolute path like file:///tmp/backup",
154                }
155                .build()
156            })
157            .map(|path| path.to_string_lossy().into_owned()),
158    }
159}
160
161async fn ensure_snapshot_exists(storage: &OpenDalStorage) -> Result<()> {
162    if storage.exists().await? {
163        Ok(())
164    } else {
165        SnapshotNotFoundSnafu {
166            uri: storage.target_uri.as_str(),
167        }
168        .fail()
169    }
170}
171
172/// Snapshot storage abstraction.
173///
174/// Provides operations for reading and writing snapshot data to various storage backends.
175#[async_trait]
176pub trait SnapshotStorage: Send + Sync {
177    /// Checks if a snapshot exists at this location (manifest.json exists).
178    async fn exists(&self) -> Result<bool>;
179
180    /// Reads the manifest file.
181    async fn read_manifest(&self) -> Result<Manifest>;
182
183    /// Writes the manifest file.
184    async fn write_manifest(&self, manifest: &Manifest) -> Result<()>;
185
186    /// Writes the schema index to schema/schemas.json.
187    async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()>;
188
189    /// Writes a text file to a relative path under the snapshot root.
190    async fn write_text(&self, path: &str, content: &str) -> Result<()>;
191
192    /// Reads a text file from a relative path under the snapshot root.
193    async fn read_text(&self, path: &str) -> Result<String>;
194
195    /// Creates a directory-like prefix under the snapshot root when needed by the backend.
196    async fn create_dir_all(&self, path: &str) -> Result<()>;
197
198    /// Lists files recursively under a relative prefix.
199    async fn list_files_recursive(&self, prefix: &str) -> Result<Vec<String>>;
200
201    /// Deletes the entire snapshot (for --force).
202    async fn delete_snapshot(&self) -> Result<()>;
203}
204
205/// OpenDAL-based implementation of SnapshotStorage.
206pub struct OpenDalStorage {
207    object_store: ObjectStore,
208    target_uri: String,
209}
210
211impl OpenDalStorage {
212    fn new_operator_rooted(object_store: ObjectStore, target_uri: &str) -> Self {
213        Self {
214            object_store,
215            target_uri: target_uri.to_string(),
216        }
217    }
218
219    fn finish_local_store(object_store: ObjectStore) -> ObjectStore {
220        with_instrument_layers(object_store, false)
221    }
222
223    fn finish_remote_store(object_store: ObjectStore) -> ObjectStore {
224        with_instrument_layers(with_retry_layers(object_store), false)
225    }
226
227    fn ensure_backend_enabled(uri: &str, enabled: bool, reason: &'static str) -> Result<()> {
228        if enabled {
229            Ok(())
230        } else {
231            InvalidUriSnafu { uri, reason }.fail()
232        }
233    }
234
235    fn validate_remote_config<E: std::fmt::Display>(
236        uri: &str,
237        backend: &str,
238        result: std::result::Result<(), E>,
239    ) -> Result<()> {
240        result.map_err(|error| {
241            InvalidUriSnafu {
242                uri,
243                reason: format!("invalid {} config: {}", backend, error),
244            }
245            .build()
246        })
247    }
248
249    /// Creates a new storage from a file:// URI.
250    pub fn from_file_uri(uri: &str) -> Result<Self> {
251        let path = extract_file_path_from_uri(uri)?;
252
253        let builder = Fs::default().root(&path);
254        let object_store = ObjectStore::new(builder)
255            .context(BuildObjectStoreSnafu)?
256            .finish();
257        Ok(Self::new_operator_rooted(
258            Self::finish_local_store(object_store),
259            uri,
260        ))
261    }
262
263    fn from_file_uri_with_config(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
264        if storage.enable_s3 || storage.enable_oss || storage.enable_gcs || storage.enable_azblob {
265            return InvalidUriSnafu {
266                uri,
267                reason: "file:// cannot be used with remote storage flags",
268            }
269            .fail();
270        }
271
272        Self::from_file_uri(uri)
273    }
274
275    fn from_s3_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
276        Self::from_s3_uri_with_root_policy(uri, storage, false)
277    }
278
279    fn from_s3_uri_with_root_policy(
280        uri: &str,
281        storage: &ObjectStoreConfig,
282        allow_empty_root: bool,
283    ) -> Result<Self> {
284        Self::ensure_backend_enabled(
285            uri,
286            storage.enable_s3,
287            "s3:// requires --s3 and related options",
288        )?;
289
290        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
291        let mut config = storage.s3.clone();
292        config.s3_bucket = location.bucket_or_container;
293        config.s3_root = location.root;
294        Self::validate_remote_config(uri, "s3", config.validate())?;
295
296        let conn: S3Connection = config.into();
297        let object_store = ObjectStore::new(S3::from(&conn))
298            .context(BuildObjectStoreSnafu)?
299            .finish();
300        Ok(Self::new_operator_rooted(
301            Self::finish_remote_store(object_store),
302            uri,
303        ))
304    }
305
306    fn from_oss_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
307        Self::from_oss_uri_with_root_policy(uri, storage, false)
308    }
309
310    fn from_oss_uri_with_root_policy(
311        uri: &str,
312        storage: &ObjectStoreConfig,
313        allow_empty_root: bool,
314    ) -> Result<Self> {
315        Self::ensure_backend_enabled(
316            uri,
317            storage.enable_oss,
318            "oss:// requires --oss and related options",
319        )?;
320
321        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
322        let mut config = storage.oss.clone();
323        config.oss_bucket = location.bucket_or_container;
324        config.oss_root = location.root;
325        Self::validate_remote_config(uri, "oss", config.validate())?;
326
327        let conn: OssConnection = config.into();
328        let object_store = ObjectStore::new(Oss::from(&conn))
329            .context(BuildObjectStoreSnafu)?
330            .finish();
331        Ok(Self::new_operator_rooted(
332            Self::finish_remote_store(object_store),
333            uri,
334        ))
335    }
336
337    fn from_gcs_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
338        Self::from_gcs_uri_with_root_policy(uri, storage, false)
339    }
340
341    fn from_gcs_uri_with_root_policy(
342        uri: &str,
343        storage: &ObjectStoreConfig,
344        allow_empty_root: bool,
345    ) -> Result<Self> {
346        Self::ensure_backend_enabled(
347            uri,
348            storage.enable_gcs,
349            "gs:// or gcs:// requires --gcs and related options",
350        )?;
351
352        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
353        let mut config = storage.gcs.clone();
354        config.gcs_bucket = location.bucket_or_container;
355        config.gcs_root = location.root;
356        // GCS validate() rejects empty root, unlike S3/OSS/Azblob.
357        if allow_empty_root && config.gcs_root.is_empty() {
358            Self::validate_gcs_parent_config(uri, &config)?;
359        } else {
360            Self::validate_remote_config(uri, "gcs", config.validate())?;
361        }
362
363        let conn: GcsConnection = config.into();
364        let object_store = ObjectStore::new(Gcs::from(&conn))
365            .context(BuildObjectStoreSnafu)?
366            .finish();
367        Ok(Self::new_operator_rooted(
368            Self::finish_remote_store(object_store),
369            uri,
370        ))
371    }
372
373    fn validate_gcs_parent_config(
374        uri: &str,
375        config: &crate::common::PrefixedGcsConnection,
376    ) -> Result<()> {
377        if config.gcs_bucket.is_empty() {
378            return InvalidUriSnafu {
379                uri,
380                reason: "invalid gcs config: GCS bucket must be set when --gcs is enabled.",
381            }
382            .fail();
383        }
384        if config.gcs_scope.is_empty() {
385            return InvalidUriSnafu {
386                uri,
387                reason: "invalid gcs config: GCS scope must be set when --gcs is enabled.",
388            }
389            .fail();
390        }
391        Ok(())
392    }
393
394    fn from_azblob_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
395        Self::from_azblob_uri_with_root_policy(uri, storage, false)
396    }
397
398    fn from_azblob_uri_with_root_policy(
399        uri: &str,
400        storage: &ObjectStoreConfig,
401        allow_empty_root: bool,
402    ) -> Result<Self> {
403        Self::ensure_backend_enabled(
404            uri,
405            storage.enable_azblob,
406            "azblob:// requires --azblob and related options",
407        )?;
408
409        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
410        let mut config = storage.azblob.clone();
411        config.azblob_container = location.bucket_or_container;
412        config.azblob_root = location.root;
413        Self::validate_remote_config(uri, "azblob", config.validate())?;
414
415        let conn: AzblobConnection = config.into();
416        let object_store = ObjectStore::new(Azblob::from(&conn))
417            .context(BuildObjectStoreSnafu)?
418            .finish();
419        Ok(Self::new_operator_rooted(
420            Self::finish_remote_store(object_store),
421            uri,
422        ))
423    }
424
425    /// Creates a new storage from a URI and object store config.
426    pub fn from_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
427        match StorageScheme::from_uri(uri)? {
428            StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
429            StorageScheme::S3 => Self::from_s3_uri(uri, storage),
430            StorageScheme::Oss => Self::from_oss_uri(uri, storage),
431            StorageScheme::Gcs => Self::from_gcs_uri(uri, storage),
432            StorageScheme::Azblob => Self::from_azblob_uri(uri, storage),
433        }
434    }
435
436    /// Creates storage rooted at a snapshot parent URI.
437    ///
438    /// Parent-oriented commands such as `export-v2 list` may scan bucket/container
439    /// roots. Snapshot-oriented commands must keep using `from_uri`, which rejects
440    /// empty remote roots to avoid unsafe snapshot operations at bucket scope.
441    pub fn from_parent_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
442        match StorageScheme::from_uri(uri)? {
443            StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
444            StorageScheme::S3 => Self::from_s3_uri_with_root_policy(uri, storage, true),
445            StorageScheme::Oss => Self::from_oss_uri_with_root_policy(uri, storage, true),
446            StorageScheme::Gcs => Self::from_gcs_uri_with_root_policy(uri, storage, true),
447            StorageScheme::Azblob => Self::from_azblob_uri_with_root_policy(uri, storage, true),
448        }
449    }
450
451    /// Reads a file as bytes.
452    async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
453        let data = self
454            .object_store
455            .read(path)
456            .await
457            .context(StorageOperationSnafu {
458                operation: format!("read {}", path),
459            })?;
460        Ok(data.to_vec())
461    }
462
463    /// Reads a file as bytes if it exists.
464    pub(crate) async fn read_file_if_exists(&self, path: &str) -> Result<Option<Vec<u8>>> {
465        match self.object_store.read(path).await {
466            Ok(data) => Ok(Some(data.to_vec())),
467            Err(error) if error.kind() == ErrorKind::NotFound => Ok(None),
468            Err(error) => Err(error).context(StorageOperationSnafu {
469                operation: format!("read {}", path),
470            }),
471        }
472    }
473
474    /// Writes bytes to a file.
475    async fn write_file(&self, path: &str, data: Vec<u8>) -> Result<()> {
476        self.object_store
477            .write(path, data)
478            .await
479            .map(|_| ())
480            .context(StorageOperationSnafu {
481                operation: format!("write {}", path),
482            })
483    }
484
485    /// Checks if a file exists using stat.
486    async fn file_exists(&self, path: &str) -> Result<bool> {
487        match self.object_store.stat(path).await {
488            Ok(_) => Ok(true),
489            Err(e) if e.kind() == object_store::ErrorKind::NotFound => Ok(false),
490            Err(e) => Err(e).context(StorageOperationSnafu {
491                operation: format!("check exists {}", path),
492            }),
493        }
494    }
495
496    /// Lists direct child directory names under the storage root.
497    pub(crate) async fn list_direct_child_dirs(&self) -> Result<Vec<String>> {
498        let mut lister = match self.object_store.lister_with("/").recursive(false).await {
499            Ok(lister) => lister,
500            Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
501            Err(error) => {
502                return Err(error).context(StorageOperationSnafu {
503                    operation: "list /",
504                });
505            }
506        };
507
508        let mut dirs = BTreeSet::new();
509        while let Some(entry) = lister.try_next().await.context(StorageOperationSnafu {
510            operation: "list /",
511        })? {
512            let path = entry.path().trim_matches('/');
513            if path.is_empty() {
514                continue;
515            }
516
517            if entry.metadata().is_dir()
518                && let Some(name) = path.split('/').next()
519            {
520                dirs.insert(name.to_string());
521            }
522        }
523
524        Ok(dirs.into_iter().collect())
525    }
526
527    #[cfg(test)]
528    pub async fn read_schema(&self) -> Result<SchemaSnapshot> {
529        let schemas_path = schema_index_path();
530        let schemas: Vec<SchemaDefinition> = if self.file_exists(&schemas_path).await? {
531            let data = self.read_file(&schemas_path).await?;
532            serde_json::from_slice(&data).context(ManifestParseSnafu)?
533        } else {
534            vec![]
535        };
536
537        Ok(SchemaSnapshot { schemas })
538    }
539}
540
541#[async_trait]
542impl SnapshotStorage for OpenDalStorage {
543    async fn exists(&self) -> Result<bool> {
544        self.file_exists(MANIFEST_FILE).await
545    }
546
547    async fn read_manifest(&self) -> Result<Manifest> {
548        ensure_snapshot_exists(self).await?;
549
550        let data = self.read_file(MANIFEST_FILE).await?;
551        serde_json::from_slice(&data).context(ManifestParseSnafu)
552    }
553
554    async fn write_manifest(&self, manifest: &Manifest) -> Result<()> {
555        let data = serde_json::to_vec_pretty(manifest).context(ManifestSerializeSnafu)?;
556        self.write_file(MANIFEST_FILE, data).await
557    }
558
559    async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()> {
560        let schemas_path = schema_index_path();
561        let schemas_data =
562            serde_json::to_vec_pretty(&schema.schemas).context(ManifestSerializeSnafu)?;
563        self.write_file(&schemas_path, schemas_data).await
564    }
565
566    async fn write_text(&self, path: &str, content: &str) -> Result<()> {
567        self.write_file(path, content.as_bytes().to_vec()).await
568    }
569
570    async fn read_text(&self, path: &str) -> Result<String> {
571        let data = self.read_file(path).await?;
572        String::from_utf8(data).context(TextDecodeSnafu)
573    }
574
575    async fn create_dir_all(&self, path: &str) -> Result<()> {
576        self.object_store
577            .create_dir(path)
578            .await
579            .context(StorageOperationSnafu {
580                operation: format!("create dir {}", path),
581            })
582    }
583
584    async fn list_files_recursive(&self, prefix: &str) -> Result<Vec<String>> {
585        let mut lister = match self.object_store.lister_with(prefix).recursive(true).await {
586            Ok(lister) => lister,
587            Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
588            Err(error) => {
589                return Err(error).context(StorageOperationSnafu {
590                    operation: format!("list {}", prefix),
591                });
592            }
593        };
594
595        let mut files = Vec::new();
596        while let Some(entry) = lister.try_next().await.context(StorageOperationSnafu {
597            operation: format!("list {}", prefix),
598        })? {
599            if entry.metadata().is_dir() {
600                continue;
601            }
602            files.push(entry.path().to_string());
603        }
604        Ok(files)
605    }
606
607    async fn delete_snapshot(&self) -> Result<()> {
608        self.object_store
609            .delete_with("/")
610            .recursive(true)
611            .await
612            .context(StorageOperationSnafu {
613                operation: "delete snapshot",
614            })
615    }
616}
617
618#[cfg(test)]
619mod tests {
620    use std::collections::HashMap;
621    use std::path::Path;
622
623    use object_store::ObjectStore;
624    use object_store::services::Fs;
625    use tempfile::tempdir;
626    use url::Url;
627
628    use super::*;
629    use crate::data::export_v2::manifest::{DataFormat, TimeRange};
630    use crate::data::export_v2::schema::SchemaDefinition;
631
632    fn make_storage_with_rooted_fs(dir: &std::path::Path) -> OpenDalStorage {
633        let object_store = ObjectStore::new(Fs::default().root(dir.to_str().unwrap()))
634            .unwrap()
635            .finish();
636        OpenDalStorage::new_operator_rooted(
637            OpenDalStorage::finish_local_store(object_store),
638            Url::from_directory_path(dir).unwrap().as_ref(),
639        )
640    }
641
642    #[test]
643    fn test_validate_uri_valid() {
644        assert_eq!(validate_uri("s3://bucket/path").unwrap(), StorageScheme::S3);
645        assert_eq!(
646            validate_uri("oss://bucket/path").unwrap(),
647            StorageScheme::Oss
648        );
649        assert_eq!(
650            validate_uri("gs://bucket/path").unwrap(),
651            StorageScheme::Gcs
652        );
653        assert_eq!(
654            validate_uri("gcs://bucket/path").unwrap(),
655            StorageScheme::Gcs
656        );
657        assert_eq!(
658            validate_uri("azblob://container/path").unwrap(),
659            StorageScheme::Azblob
660        );
661        assert_eq!(
662            validate_uri("file:///tmp/backup").unwrap(),
663            StorageScheme::File
664        );
665    }
666
667    #[test]
668    fn test_validate_uri_invalid() {
669        // Bare paths should be rejected
670        assert!(validate_uri("/tmp/backup").is_err());
671        assert!(validate_uri("./backup").is_err());
672        assert!(validate_uri("backup").is_err());
673
674        // Unknown schemes
675        assert!(validate_uri("ftp://server/path").is_err());
676    }
677
678    #[test]
679    fn test_extract_remote_location_requires_non_empty_root() {
680        assert!(extract_remote_location_with_root_policy("s3://bucket", false).is_err());
681        assert!(extract_remote_location_with_root_policy("s3://bucket/", false).is_err());
682        assert!(extract_remote_location_with_root_policy("oss://bucket", false).is_err());
683        assert!(extract_remote_location_with_root_policy("gs://bucket", false).is_err());
684        assert!(extract_remote_location_with_root_policy("azblob://container", false).is_err());
685    }
686
687    #[test]
688    fn test_extract_remote_location_allows_empty_root_when_permitted() {
689        let location = extract_remote_location_with_root_policy("s3://bucket", true).unwrap();
690        assert_eq!(location.bucket_or_container, "bucket");
691        assert_eq!(location.root, "");
692
693        let location =
694            extract_remote_location_with_root_policy("azblob://container/", true).unwrap();
695        assert_eq!(location.bucket_or_container, "container");
696        assert_eq!(location.root, "");
697    }
698
699    #[test]
700    fn test_parent_storage_allows_s3_bucket_root() {
701        let mut storage = ObjectStoreConfig {
702            enable_s3: true,
703            ..Default::default()
704        };
705        storage.s3.s3_region = Some("us-east-1".to_string());
706
707        assert!(OpenDalStorage::from_uri("s3://bucket", &storage).is_err());
708        assert!(OpenDalStorage::from_parent_uri("s3://bucket", &storage).is_ok());
709    }
710
711    #[cfg(not(windows))]
712    #[test]
713    fn test_extract_path_from_uri_unix_examples() {
714        assert_eq!(
715            extract_file_path_from_uri("file:///tmp/backup").unwrap(),
716            "/tmp/backup"
717        );
718        assert_eq!(
719            extract_file_path_from_uri("file://localhost/tmp/backup").unwrap(),
720            "/tmp/backup"
721        );
722        assert_eq!(
723            extract_file_path_from_uri("file:///tmp/my%20backup").unwrap(),
724            "/tmp/my backup"
725        );
726        assert_eq!(
727            extract_file_path_from_uri("file://localhost/tmp/my%20backup").unwrap(),
728            "/tmp/my backup"
729        );
730    }
731
732    #[test]
733    fn test_extract_file_path_from_uri_rejects_file_host() {
734        assert!(extract_file_path_from_uri("file://tmp/backup").is_err());
735    }
736
737    #[test]
738    fn test_extract_file_path_from_uri_round_trips_directory_url() {
739        let dir = tempdir().unwrap();
740        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
741        let path = extract_file_path_from_uri(&uri).unwrap();
742
743        assert_eq!(Path::new(&path), dir.path());
744    }
745
746    #[tokio::test]
747    async fn test_read_manifest_reports_requested_uri() {
748        let dir = tempdir().unwrap();
749        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
750        let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
751
752        let error = storage.read_manifest().await.unwrap_err().to_string();
753
754        assert!(error.contains(uri.as_str()));
755    }
756
757    #[tokio::test]
758    async fn test_manifest_round_trip() {
759        let dir = tempdir().unwrap();
760        let storage = make_storage_with_rooted_fs(dir.path());
761
762        let manifest = Manifest::new_full(
763            "greptime".to_string(),
764            vec!["public".to_string()],
765            TimeRange::unbounded(),
766            DataFormat::Parquet,
767        );
768
769        storage.write_manifest(&manifest).await.unwrap();
770        let loaded = storage.read_manifest().await.unwrap();
771
772        assert_eq!(loaded.catalog, manifest.catalog);
773        assert_eq!(loaded.schemas, manifest.schemas);
774        assert_eq!(loaded.schema_only, manifest.schema_only);
775        assert_eq!(loaded.format, manifest.format);
776        assert_eq!(loaded.snapshot_id, manifest.snapshot_id);
777    }
778
779    #[tokio::test]
780    async fn test_schema_round_trip() {
781        let dir = tempdir().unwrap();
782        let storage = make_storage_with_rooted_fs(dir.path());
783
784        let mut snapshot = SchemaSnapshot::new();
785        snapshot.add_schema(SchemaDefinition {
786            catalog: "greptime".to_string(),
787            name: "test_db".to_string(),
788            options: HashMap::from([("ttl".to_string(), "7d".to_string())]),
789        });
790
791        storage.write_schema(&snapshot).await.unwrap();
792        let loaded = storage.read_schema().await.unwrap();
793
794        assert_eq!(loaded, snapshot);
795    }
796
797    #[tokio::test]
798    async fn test_text_round_trip() {
799        let dir = tempdir().unwrap();
800        let storage = make_storage_with_rooted_fs(dir.path());
801        let content = "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX);";
802
803        storage
804            .write_text("schema/ddl/public.sql", content)
805            .await
806            .unwrap();
807        let loaded = storage.read_text("schema/ddl/public.sql").await.unwrap();
808
809        assert_eq!(loaded, content);
810    }
811
812    #[tokio::test]
813    async fn test_read_text_rejects_invalid_utf8() {
814        let dir = tempdir().unwrap();
815        let storage = make_storage_with_rooted_fs(dir.path());
816
817        storage
818            .write_file("schema/ddl/public.sql", vec![0xff, 0xfe, 0xfd])
819            .await
820            .unwrap();
821
822        let error = storage
823            .read_text("schema/ddl/public.sql")
824            .await
825            .unwrap_err();
826        assert!(error.to_string().contains("UTF-8"));
827    }
828
829    #[tokio::test]
830    async fn test_exists_follows_manifest_presence() {
831        let dir = tempdir().unwrap();
832        let storage = make_storage_with_rooted_fs(dir.path());
833
834        assert!(!storage.exists().await.unwrap());
835
836        storage
837            .write_manifest(&Manifest::new_schema_only(
838                "greptime".to_string(),
839                vec!["public".to_string()],
840            ))
841            .await
842            .unwrap();
843
844        assert!(storage.exists().await.unwrap());
845    }
846
847    #[tokio::test]
848    async fn test_delete_snapshot_only_removes_rooted_contents() {
849        let parent = tempdir().unwrap();
850        let snapshot_root = parent.path().join("snapshot");
851        let sibling = parent.path().join("sibling");
852        std::fs::create_dir_all(&snapshot_root).unwrap();
853        std::fs::create_dir_all(&sibling).unwrap();
854        std::fs::write(snapshot_root.join("manifest.json"), b"{}").unwrap();
855        std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
856
857        let storage = make_storage_with_rooted_fs(&snapshot_root);
858        storage.delete_snapshot().await.unwrap();
859
860        assert!(!snapshot_root.join("manifest.json").exists());
861        assert!(sibling.join("keep.txt").exists());
862    }
863}