Skip to main content

cli/data/
snapshot_storage.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Storage abstraction for Export/Import V2.
16//!
17//! This module provides a unified interface for reading and writing snapshot data
18//! to various storage backends (S3, OSS, GCS, Azure Blob, local filesystem).
19
20use std::collections::BTreeSet;
21use std::path::Component;
22
23use async_trait::async_trait;
24use futures::TryStreamExt;
25use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
26use object_store::util::{with_instrument_layers, with_retry_layers};
27use object_store::{
28    AzblobConnection, ErrorKind, GcsConnection, ObjectStore, OssConnection, S3Connection,
29};
30use snafu::ResultExt;
31use url::Url;
32
33use crate::common::ObjectStoreConfig;
34use crate::data::export_v2::error::{
35    BuildObjectStoreSnafu, InvalidUriSnafu, ManifestParseSnafu, ManifestSerializeSnafu, Result,
36    SnapshotNotFoundSnafu, StorageOperationSnafu, TextDecodeSnafu, UnsupportedSchemeSnafu,
37    UrlParseSnafu,
38};
39use crate::data::export_v2::manifest::{MANIFEST_FILE, Manifest};
40#[cfg(test)]
41use crate::data::export_v2::schema::SchemaDefinition;
42use crate::data::export_v2::schema::{SCHEMA_DIR, SCHEMAS_FILE, SchemaSnapshot};
43
44struct RemoteLocation {
45    bucket_or_container: String,
46    root: String,
47}
48
49/// URI schemes supported for snapshot storage.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum StorageScheme {
52    /// Amazon S3.
53    S3,
54    /// Alibaba Cloud OSS.
55    Oss,
56    /// Google Cloud Storage.
57    Gcs,
58    /// Azure Blob Storage.
59    Azblob,
60    /// Local filesystem (file://).
61    File,
62}
63
64impl StorageScheme {
65    /// Parses storage scheme from URI.
66    pub fn from_uri(uri: &str) -> Result<Self> {
67        let url = Url::parse(uri).context(UrlParseSnafu)?;
68
69        match url.scheme() {
70            "s3" => Ok(Self::S3),
71            "oss" => Ok(Self::Oss),
72            "gs" | "gcs" => Ok(Self::Gcs),
73            "azblob" => Ok(Self::Azblob),
74            "file" => Ok(Self::File),
75            scheme => UnsupportedSchemeSnafu { scheme }.fail(),
76        }
77    }
78}
79
80/// Extracts bucket/container and root path from a URI.
81fn extract_remote_location_with_root_policy(
82    uri: &str,
83    allow_empty_root: bool,
84) -> Result<RemoteLocation> {
85    let url = Url::parse(uri).context(UrlParseSnafu)?;
86    let bucket_or_container = url.host_str().unwrap_or("").to_string();
87    if bucket_or_container.is_empty() {
88        return InvalidUriSnafu {
89            uri,
90            reason: "URI must include bucket/container in host",
91        }
92        .fail();
93    }
94
95    let root = url.path().trim_start_matches('/').to_string();
96    if root.is_empty() && !allow_empty_root {
97        return InvalidUriSnafu {
98            uri,
99            reason: "snapshot URI must include a non-empty path after the bucket/container",
100        }
101        .fail();
102    }
103
104    Ok(RemoteLocation {
105        bucket_or_container,
106        root,
107    })
108}
109
110/// Validates that a URI has a proper scheme.
111///
112/// Rejects bare paths (e.g., `/tmp/backup`, `./backup`) because:
113/// - Schema export (CLI) and data export (server) run in different processes
114/// - Using bare paths would split the snapshot across machines
115///
116/// Supported URI schemes:
117/// - `s3://bucket/path` - Amazon S3
118/// - `oss://bucket/path` - Alibaba Cloud OSS
119/// - `gs://bucket/path` - Google Cloud Storage
120/// - `azblob://container/path` - Azure Blob Storage
121/// - `file:///absolute/path` - Local filesystem
122pub fn validate_uri(uri: &str) -> Result<StorageScheme> {
123    // Must have a scheme
124    if !uri.contains("://") {
125        return InvalidUriSnafu {
126            uri,
127            reason: "URI must have a scheme (e.g., s3://, file://). Bare paths are not supported.",
128        }
129        .fail();
130    }
131
132    StorageScheme::from_uri(uri)
133}
134
135/// Validates a URI for snapshot-scoped destructive operations.
136///
137/// Unlike read-only parent scans, destructive commands must target a concrete
138/// snapshot directory instead of a bucket/container root or filesystem root.
139/// Remote storage buckets/containers already provide namespace isolation, so a
140/// non-empty object prefix is enough; local filesystem paths require at least
141/// two non-root path segments to avoid deleting broad system directories.
142pub fn validate_snapshot_uri(uri: &str) -> Result<StorageScheme> {
143    let scheme = validate_uri(uri)?;
144    reject_query_or_fragment(uri)?;
145    match scheme {
146        StorageScheme::File => validate_file_snapshot_uri(uri)?,
147        StorageScheme::S3 | StorageScheme::Oss | StorageScheme::Gcs | StorageScheme::Azblob => {
148            extract_remote_location_with_root_policy(uri, false)?;
149        }
150    }
151    Ok(scheme)
152}
153
154fn reject_query_or_fragment(uri: &str) -> Result<()> {
155    let url = Url::parse(uri).context(UrlParseSnafu)?;
156    if url.query().is_some() || url.fragment().is_some() {
157        return InvalidUriSnafu {
158            uri,
159            reason: "snapshot URI must not include query or fragment",
160        }
161        .fail();
162    }
163
164    Ok(())
165}
166
167fn validate_file_snapshot_uri(uri: &str) -> Result<()> {
168    if has_explicit_dot_segment(uri) {
169        return InvalidUriSnafu {
170            uri,
171            reason: "file snapshot URI must not contain '.' or '..' path segments",
172        }
173        .fail();
174    }
175
176    let path = extract_file_path_from_uri(uri)?;
177    let mut normal_component_count = 0;
178
179    // This is only a path-shape guard for destructive operations. It does not
180    // resolve symlinks. Drive prefixes and root separators also do not count
181    // toward depth; delete still relies on the manifest check and explicit
182    // confirmation before removing the rooted storage prefix.
183    for component in std::path::Path::new(&path).components() {
184        match component {
185            Component::Normal(_) => normal_component_count += 1,
186            Component::CurDir | Component::ParentDir => {
187                return InvalidUriSnafu {
188                    uri,
189                    reason: "file snapshot URI must not contain '.' or '..' path segments",
190                }
191                .fail();
192            }
193            Component::Prefix(_) | Component::RootDir => {}
194        }
195    }
196
197    if normal_component_count < 2 {
198        return InvalidUriSnafu {
199            uri,
200            reason: "file snapshot URI must point to a directory at least two levels deep",
201        }
202        .fail();
203    }
204
205    Ok(())
206}
207
208fn has_explicit_dot_segment(uri: &str) -> bool {
209    // Defense in depth: catch dot segments at the raw URI level before
210    // `Url::to_file_path()` can normalize them away. The `Path::components()`
211    // check below still runs because URL decoding can reintroduce them.
212    let without_fragment = uri.split_once('#').map_or(uri, |(path, _)| path);
213    let path = without_fragment
214        .split_once('?')
215        .map_or(without_fragment, |(path, _)| path);
216
217    path.split('/')
218        .any(|segment| segment == "." || segment == "..")
219}
220
221fn schema_index_path() -> String {
222    format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE)
223}
224
225/// Extracts the absolute filesystem path from a file:// URI.
226fn extract_file_path_from_uri(uri: &str) -> Result<String> {
227    let url = Url::parse(uri).context(UrlParseSnafu)?;
228
229    match url.host_str() {
230        Some(host) if !host.is_empty() && host != "localhost" => InvalidUriSnafu {
231            uri,
232            reason: "file:// URI must use an absolute path like file:///tmp/backup",
233        }
234        .fail(),
235        _ => url
236            .to_file_path()
237            .map_err(|_| {
238                InvalidUriSnafu {
239                    uri,
240                    reason: "file:// URI must use an absolute path like file:///tmp/backup",
241                }
242                .build()
243            })
244            .map(|path| path.to_string_lossy().into_owned()),
245    }
246}
247
248async fn ensure_snapshot_exists(storage: &OpenDalStorage) -> Result<()> {
249    if storage.exists().await? {
250        Ok(())
251    } else {
252        SnapshotNotFoundSnafu {
253            uri: storage.target_uri.as_str(),
254        }
255        .fail()
256    }
257}
258
259/// Snapshot storage abstraction.
260///
261/// Provides operations for reading and writing snapshot data to various storage backends.
262#[async_trait]
263pub trait SnapshotStorage: Send + Sync {
264    /// Checks if a snapshot exists at this location (manifest.json exists).
265    async fn exists(&self) -> Result<bool>;
266
267    /// Reads the manifest file.
268    async fn read_manifest(&self) -> Result<Manifest>;
269
270    /// Writes the manifest file.
271    async fn write_manifest(&self, manifest: &Manifest) -> Result<()>;
272
273    /// Writes the schema index to schema/schemas.json.
274    async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()>;
275
276    /// Writes a text file to a relative path under the snapshot root.
277    async fn write_text(&self, path: &str, content: &str) -> Result<()>;
278
279    /// Reads a text file from a relative path under the snapshot root.
280    async fn read_text(&self, path: &str) -> Result<String>;
281
282    /// Creates a directory-like prefix under the snapshot root when needed by the backend.
283    async fn create_dir_all(&self, path: &str) -> Result<()>;
284
285    /// Lists files recursively under a relative prefix.
286    async fn list_files_recursive(&self, prefix: &str) -> Result<Vec<String>>;
287
288    /// Deletes the entire snapshot (for --force).
289    async fn delete_snapshot(&self) -> Result<()>;
290}
291
292/// OpenDAL-based implementation of SnapshotStorage.
293pub struct OpenDalStorage {
294    object_store: ObjectStore,
295    target_uri: String,
296}
297
298impl OpenDalStorage {
299    fn new_operator_rooted(object_store: ObjectStore, target_uri: &str) -> Self {
300        Self {
301            object_store,
302            target_uri: target_uri.to_string(),
303        }
304    }
305
306    fn finish_local_store(object_store: ObjectStore) -> ObjectStore {
307        with_instrument_layers(object_store, false)
308    }
309
310    fn finish_remote_store(object_store: ObjectStore) -> ObjectStore {
311        with_instrument_layers(with_retry_layers(object_store), false)
312    }
313
314    fn ensure_backend_enabled(uri: &str, enabled: bool, reason: &'static str) -> Result<()> {
315        if enabled {
316            Ok(())
317        } else {
318            InvalidUriSnafu { uri, reason }.fail()
319        }
320    }
321
322    fn validate_remote_config<E: std::fmt::Display>(
323        uri: &str,
324        backend: &str,
325        result: std::result::Result<(), E>,
326    ) -> Result<()> {
327        result.map_err(|error| {
328            InvalidUriSnafu {
329                uri,
330                reason: format!("invalid {} config: {}", backend, error),
331            }
332            .build()
333        })
334    }
335
336    /// Creates a new storage from a file:// URI.
337    pub fn from_file_uri(uri: &str) -> Result<Self> {
338        let path = extract_file_path_from_uri(uri)?;
339
340        let builder = Fs::default().root(&path);
341        let object_store = ObjectStore::new(builder)
342            .context(BuildObjectStoreSnafu)?
343            .finish();
344        Ok(Self::new_operator_rooted(
345            Self::finish_local_store(object_store),
346            uri,
347        ))
348    }
349
350    fn from_file_uri_with_config(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
351        if storage.enable_s3 || storage.enable_oss || storage.enable_gcs || storage.enable_azblob {
352            return InvalidUriSnafu {
353                uri,
354                reason: "file:// cannot be used with remote storage flags",
355            }
356            .fail();
357        }
358
359        Self::from_file_uri(uri)
360    }
361
362    fn from_s3_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
363        Self::from_s3_uri_with_root_policy(uri, storage, false)
364    }
365
366    fn from_s3_uri_with_root_policy(
367        uri: &str,
368        storage: &ObjectStoreConfig,
369        allow_empty_root: bool,
370    ) -> Result<Self> {
371        Self::ensure_backend_enabled(
372            uri,
373            storage.enable_s3,
374            "s3:// requires --s3 and related options",
375        )?;
376
377        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
378        let mut config = storage.s3.clone();
379        config.s3_bucket = location.bucket_or_container;
380        config.s3_root = location.root;
381        Self::validate_remote_config(uri, "s3", config.validate())?;
382
383        let conn: S3Connection = config.into();
384        let object_store = ObjectStore::new(S3::from(&conn))
385            .context(BuildObjectStoreSnafu)?
386            .finish();
387        Ok(Self::new_operator_rooted(
388            Self::finish_remote_store(object_store),
389            uri,
390        ))
391    }
392
393    fn from_oss_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
394        Self::from_oss_uri_with_root_policy(uri, storage, false)
395    }
396
397    fn from_oss_uri_with_root_policy(
398        uri: &str,
399        storage: &ObjectStoreConfig,
400        allow_empty_root: bool,
401    ) -> Result<Self> {
402        Self::ensure_backend_enabled(
403            uri,
404            storage.enable_oss,
405            "oss:// requires --oss and related options",
406        )?;
407
408        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
409        let mut config = storage.oss.clone();
410        config.oss_bucket = location.bucket_or_container;
411        config.oss_root = location.root;
412        Self::validate_remote_config(uri, "oss", config.validate())?;
413
414        let conn: OssConnection = config.into();
415        let object_store = ObjectStore::new(Oss::from(&conn))
416            .context(BuildObjectStoreSnafu)?
417            .finish();
418        Ok(Self::new_operator_rooted(
419            Self::finish_remote_store(object_store),
420            uri,
421        ))
422    }
423
424    fn from_gcs_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
425        Self::from_gcs_uri_with_root_policy(uri, storage, false)
426    }
427
428    fn from_gcs_uri_with_root_policy(
429        uri: &str,
430        storage: &ObjectStoreConfig,
431        allow_empty_root: bool,
432    ) -> Result<Self> {
433        Self::ensure_backend_enabled(
434            uri,
435            storage.enable_gcs,
436            "gs:// or gcs:// requires --gcs and related options",
437        )?;
438
439        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
440        let mut config = storage.gcs.clone();
441        config.gcs_bucket = location.bucket_or_container;
442        config.gcs_root = location.root;
443        // GCS validate() rejects empty root, unlike S3/OSS/Azblob.
444        if allow_empty_root && config.gcs_root.is_empty() {
445            Self::validate_gcs_parent_config(uri, &config)?;
446        } else {
447            Self::validate_remote_config(uri, "gcs", config.validate())?;
448        }
449
450        let conn: GcsConnection = config.into();
451        let object_store = ObjectStore::new(Gcs::from(&conn))
452            .context(BuildObjectStoreSnafu)?
453            .finish();
454        Ok(Self::new_operator_rooted(
455            Self::finish_remote_store(object_store),
456            uri,
457        ))
458    }
459
460    fn validate_gcs_parent_config(
461        uri: &str,
462        config: &crate::common::PrefixedGcsConnection,
463    ) -> Result<()> {
464        if config.gcs_bucket.is_empty() {
465            return InvalidUriSnafu {
466                uri,
467                reason: "invalid gcs config: GCS bucket must be set when --gcs is enabled.",
468            }
469            .fail();
470        }
471        if config.gcs_scope.is_empty() {
472            return InvalidUriSnafu {
473                uri,
474                reason: "invalid gcs config: GCS scope must be set when --gcs is enabled.",
475            }
476            .fail();
477        }
478        Ok(())
479    }
480
481    fn from_azblob_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
482        Self::from_azblob_uri_with_root_policy(uri, storage, false)
483    }
484
485    fn from_azblob_uri_with_root_policy(
486        uri: &str,
487        storage: &ObjectStoreConfig,
488        allow_empty_root: bool,
489    ) -> Result<Self> {
490        Self::ensure_backend_enabled(
491            uri,
492            storage.enable_azblob,
493            "azblob:// requires --azblob and related options",
494        )?;
495
496        let location = extract_remote_location_with_root_policy(uri, allow_empty_root)?;
497        let mut config = storage.azblob.clone();
498        config.azblob_container = location.bucket_or_container;
499        config.azblob_root = location.root;
500        Self::validate_remote_config(uri, "azblob", config.validate())?;
501
502        let conn: AzblobConnection = config.into();
503        let object_store = ObjectStore::new(Azblob::from(&conn))
504            .context(BuildObjectStoreSnafu)?
505            .finish();
506        Ok(Self::new_operator_rooted(
507            Self::finish_remote_store(object_store),
508            uri,
509        ))
510    }
511
512    /// Creates a new storage from a URI and object store config.
513    pub fn from_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
514        match StorageScheme::from_uri(uri)? {
515            StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
516            StorageScheme::S3 => Self::from_s3_uri(uri, storage),
517            StorageScheme::Oss => Self::from_oss_uri(uri, storage),
518            StorageScheme::Gcs => Self::from_gcs_uri(uri, storage),
519            StorageScheme::Azblob => Self::from_azblob_uri(uri, storage),
520        }
521    }
522
523    /// Creates storage rooted at a snapshot parent URI.
524    ///
525    /// Parent-oriented commands such as `export-v2 list` may scan bucket/container
526    /// roots. Snapshot-oriented commands must keep using `from_uri`, which rejects
527    /// empty remote roots to avoid unsafe snapshot operations at bucket scope.
528    pub fn from_parent_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
529        match StorageScheme::from_uri(uri)? {
530            StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
531            StorageScheme::S3 => Self::from_s3_uri_with_root_policy(uri, storage, true),
532            StorageScheme::Oss => Self::from_oss_uri_with_root_policy(uri, storage, true),
533            StorageScheme::Gcs => Self::from_gcs_uri_with_root_policy(uri, storage, true),
534            StorageScheme::Azblob => Self::from_azblob_uri_with_root_policy(uri, storage, true),
535        }
536    }
537
538    /// Reads a file as bytes.
539    async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
540        let data = self
541            .object_store
542            .read(path)
543            .await
544            .context(StorageOperationSnafu {
545                operation: format!("read {}", path),
546            })?;
547        Ok(data.to_vec())
548    }
549
550    /// Reads a file as bytes if it exists.
551    pub(crate) async fn read_file_if_exists(&self, path: &str) -> Result<Option<Vec<u8>>> {
552        match self.object_store.read(path).await {
553            Ok(data) => Ok(Some(data.to_vec())),
554            Err(error) if error.kind() == ErrorKind::NotFound => Ok(None),
555            Err(error) => Err(error).context(StorageOperationSnafu {
556                operation: format!("read {}", path),
557            }),
558        }
559    }
560
561    /// Writes bytes to a file.
562    async fn write_file(&self, path: &str, data: Vec<u8>) -> Result<()> {
563        self.object_store
564            .write(path, data)
565            .await
566            .map(|_| ())
567            .context(StorageOperationSnafu {
568                operation: format!("write {}", path),
569            })
570    }
571
572    /// Checks if a file exists using stat.
573    pub(crate) async fn file_exists(&self, path: &str) -> Result<bool> {
574        match self.object_store.stat(path).await {
575            Ok(metadata) => Ok(!metadata.is_dir()),
576            Err(e) if e.kind() == object_store::ErrorKind::NotFound => Ok(false),
577            Err(e) => Err(e).context(StorageOperationSnafu {
578                operation: format!("check exists {}", path),
579            }),
580        }
581    }
582
583    /// Iterates files recursively under a relative prefix without materializing
584    /// the full listing first.
585    pub(crate) async fn for_each_file_recursive<F>(&self, prefix: &str, mut f: F) -> Result<()>
586    where
587        F: FnMut(String) -> Result<()>,
588    {
589        let mut lister = match self.object_store.lister_with(prefix).recursive(true).await {
590            Ok(lister) => lister,
591            Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()),
592            Err(error) => {
593                return Err(error).context(StorageOperationSnafu {
594                    operation: format!("list {}", prefix),
595                });
596            }
597        };
598
599        while let Some(entry) = lister.try_next().await.context(StorageOperationSnafu {
600            operation: format!("list {}", prefix),
601        })? {
602            if entry.metadata().is_dir() {
603                continue;
604            }
605            f(entry.path().to_string())?;
606        }
607
608        Ok(())
609    }
610
611    /// Lists direct child directory names under the storage root.
612    pub(crate) async fn list_direct_child_dirs(&self) -> Result<Vec<String>> {
613        let mut lister = match self.object_store.lister_with("/").recursive(false).await {
614            Ok(lister) => lister,
615            Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
616            Err(error) => {
617                return Err(error).context(StorageOperationSnafu {
618                    operation: "list /",
619                });
620            }
621        };
622
623        let mut dirs = BTreeSet::new();
624        while let Some(entry) = lister.try_next().await.context(StorageOperationSnafu {
625            operation: "list /",
626        })? {
627            let path = entry.path().trim_matches('/');
628            if path.is_empty() {
629                continue;
630            }
631
632            if entry.metadata().is_dir()
633                && let Some(name) = path.split('/').next()
634            {
635                dirs.insert(name.to_string());
636            }
637        }
638
639        Ok(dirs.into_iter().collect())
640    }
641
642    #[cfg(test)]
643    pub async fn read_schema(&self) -> Result<SchemaSnapshot> {
644        let schemas_path = schema_index_path();
645        let schemas: Vec<SchemaDefinition> = if self.file_exists(&schemas_path).await? {
646            let data = self.read_file(&schemas_path).await?;
647            serde_json::from_slice(&data).context(ManifestParseSnafu)?
648        } else {
649            vec![]
650        };
651
652        Ok(SchemaSnapshot { schemas })
653    }
654}
655
656#[async_trait]
657impl SnapshotStorage for OpenDalStorage {
658    async fn exists(&self) -> Result<bool> {
659        self.file_exists(MANIFEST_FILE).await
660    }
661
662    async fn read_manifest(&self) -> Result<Manifest> {
663        ensure_snapshot_exists(self).await?;
664
665        let data = self.read_file(MANIFEST_FILE).await?;
666        serde_json::from_slice(&data).context(ManifestParseSnafu)
667    }
668
669    async fn write_manifest(&self, manifest: &Manifest) -> Result<()> {
670        let data = serde_json::to_vec_pretty(manifest).context(ManifestSerializeSnafu)?;
671        self.write_file(MANIFEST_FILE, data).await
672    }
673
674    async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()> {
675        let schemas_path = schema_index_path();
676        let schemas_data =
677            serde_json::to_vec_pretty(&schema.schemas).context(ManifestSerializeSnafu)?;
678        self.write_file(&schemas_path, schemas_data).await
679    }
680
681    async fn write_text(&self, path: &str, content: &str) -> Result<()> {
682        self.write_file(path, content.as_bytes().to_vec()).await
683    }
684
685    async fn read_text(&self, path: &str) -> Result<String> {
686        let data = self.read_file(path).await?;
687        String::from_utf8(data).context(TextDecodeSnafu)
688    }
689
690    async fn create_dir_all(&self, path: &str) -> Result<()> {
691        self.object_store
692            .create_dir(path)
693            .await
694            .context(StorageOperationSnafu {
695                operation: format!("create dir {}", path),
696            })
697    }
698
699    async fn list_files_recursive(&self, prefix: &str) -> Result<Vec<String>> {
700        let mut files = Vec::new();
701        self.for_each_file_recursive(prefix, |path| {
702            files.push(path);
703            Ok(())
704        })
705        .await?;
706        Ok(files)
707    }
708
709    async fn delete_snapshot(&self) -> Result<()> {
710        self.object_store
711            .delete_with("/")
712            .recursive(true)
713            .await
714            .context(StorageOperationSnafu {
715                operation: "delete snapshot",
716            })
717    }
718}
719
720#[cfg(test)]
721mod tests {
722    use std::collections::HashMap;
723    use std::path::Path;
724
725    use object_store::ObjectStore;
726    use object_store::services::Fs;
727    use tempfile::tempdir;
728    use url::Url;
729
730    use super::*;
731    use crate::data::export_v2::manifest::{DataFormat, TimeRange};
732    use crate::data::export_v2::schema::SchemaDefinition;
733
734    fn make_storage_with_rooted_fs(dir: &std::path::Path) -> OpenDalStorage {
735        let object_store = ObjectStore::new(Fs::default().root(dir.to_str().unwrap()))
736            .unwrap()
737            .finish();
738        OpenDalStorage::new_operator_rooted(
739            OpenDalStorage::finish_local_store(object_store),
740            Url::from_directory_path(dir).unwrap().as_ref(),
741        )
742    }
743
744    #[test]
745    fn test_validate_uri_valid() {
746        assert_eq!(validate_uri("s3://bucket/path").unwrap(), StorageScheme::S3);
747        assert_eq!(
748            validate_uri("oss://bucket/path").unwrap(),
749            StorageScheme::Oss
750        );
751        assert_eq!(
752            validate_uri("gs://bucket/path").unwrap(),
753            StorageScheme::Gcs
754        );
755        assert_eq!(
756            validate_uri("gcs://bucket/path").unwrap(),
757            StorageScheme::Gcs
758        );
759        assert_eq!(
760            validate_uri("azblob://container/path").unwrap(),
761            StorageScheme::Azblob
762        );
763        assert_eq!(
764            validate_uri("file:///tmp/backup").unwrap(),
765            StorageScheme::File
766        );
767    }
768
769    #[test]
770    fn test_validate_uri_invalid() {
771        // Bare paths should be rejected
772        assert!(validate_uri("/tmp/backup").is_err());
773        assert!(validate_uri("./backup").is_err());
774        assert!(validate_uri("backup").is_err());
775
776        // Unknown schemes
777        assert!(validate_uri("ftp://server/path").is_err());
778    }
779
780    #[test]
781    fn test_extract_remote_location_requires_non_empty_root() {
782        assert!(extract_remote_location_with_root_policy("s3://bucket", false).is_err());
783        assert!(extract_remote_location_with_root_policy("s3://bucket/", false).is_err());
784        assert!(extract_remote_location_with_root_policy("oss://bucket", false).is_err());
785        assert!(extract_remote_location_with_root_policy("gs://bucket", false).is_err());
786        assert!(extract_remote_location_with_root_policy("azblob://container", false).is_err());
787    }
788
789    #[test]
790    fn test_extract_remote_location_allows_empty_root_when_permitted() {
791        let location = extract_remote_location_with_root_policy("s3://bucket", true).unwrap();
792        assert_eq!(location.bucket_or_container, "bucket");
793        assert_eq!(location.root, "");
794
795        let location =
796            extract_remote_location_with_root_policy("azblob://container/", true).unwrap();
797        assert_eq!(location.bucket_or_container, "container");
798        assert_eq!(location.root, "");
799    }
800
801    #[test]
802    fn test_parent_storage_allows_s3_bucket_root() {
803        let mut storage = ObjectStoreConfig {
804            enable_s3: true,
805            ..Default::default()
806        };
807        storage.s3.s3_region = Some("us-east-1".to_string());
808
809        assert!(OpenDalStorage::from_uri("s3://bucket", &storage).is_err());
810        assert!(OpenDalStorage::from_parent_uri("s3://bucket", &storage).is_ok());
811    }
812
813    #[test]
814    fn test_validate_snapshot_uri_rejects_dangerous_roots() {
815        assert!(validate_snapshot_uri("s3://bucket").is_err());
816        assert!(validate_snapshot_uri("s3://bucket/").is_err());
817        assert!(validate_snapshot_uri("oss://bucket").is_err());
818        assert!(validate_snapshot_uri("gs://bucket").is_err());
819        assert!(validate_snapshot_uri("azblob://container").is_err());
820        assert!(validate_snapshot_uri("s3://bucket/snapshot?version=1").is_err());
821        assert!(validate_snapshot_uri("file:///tmp/backup#fragment").is_err());
822        assert!(validate_snapshot_uri("file:///").is_err());
823        assert!(validate_snapshot_uri("file:///tmp").is_err());
824        assert!(validate_snapshot_uri("file:///tmp/backup/.").is_err());
825        assert!(validate_snapshot_uri("file:///tmp/backup/..").is_err());
826    }
827
828    #[test]
829    fn test_validate_snapshot_uri_accepts_snapshot_paths() {
830        assert_eq!(
831            validate_snapshot_uri("s3://bucket/snapshots/prod").unwrap(),
832            StorageScheme::S3
833        );
834
835        let dir = tempdir().unwrap();
836        let snapshot = dir.path().join("snapshot");
837        std::fs::create_dir_all(&snapshot).unwrap();
838        let uri = Url::from_directory_path(snapshot).unwrap().to_string();
839        assert_eq!(validate_snapshot_uri(&uri).unwrap(), StorageScheme::File);
840    }
841
842    #[cfg(windows)]
843    #[test]
844    fn test_validate_snapshot_uri_windows_drive_prefix_depth() {
845        assert!(validate_snapshot_uri("file:///C:/").is_err());
846        assert!(validate_snapshot_uri("file:///C:/Users").is_err());
847        assert!(validate_snapshot_uri("file:///C:/Users/snapshot").is_ok());
848    }
849
850    #[cfg(not(windows))]
851    #[test]
852    fn test_extract_path_from_uri_unix_examples() {
853        assert_eq!(
854            extract_file_path_from_uri("file:///tmp/backup").unwrap(),
855            "/tmp/backup"
856        );
857        assert_eq!(
858            extract_file_path_from_uri("file://localhost/tmp/backup").unwrap(),
859            "/tmp/backup"
860        );
861        assert_eq!(
862            extract_file_path_from_uri("file:///tmp/my%20backup").unwrap(),
863            "/tmp/my backup"
864        );
865        assert_eq!(
866            extract_file_path_from_uri("file://localhost/tmp/my%20backup").unwrap(),
867            "/tmp/my backup"
868        );
869    }
870
871    #[test]
872    fn test_extract_file_path_from_uri_rejects_file_host() {
873        assert!(extract_file_path_from_uri("file://tmp/backup").is_err());
874    }
875
876    #[test]
877    fn test_extract_file_path_from_uri_round_trips_directory_url() {
878        let dir = tempdir().unwrap();
879        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
880        let path = extract_file_path_from_uri(&uri).unwrap();
881
882        assert_eq!(Path::new(&path), dir.path());
883    }
884
885    #[tokio::test]
886    async fn test_read_manifest_reports_requested_uri() {
887        let dir = tempdir().unwrap();
888        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
889        let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
890
891        let error = storage.read_manifest().await.unwrap_err().to_string();
892
893        assert!(error.contains(uri.as_str()));
894    }
895
896    #[tokio::test]
897    async fn test_manifest_round_trip() {
898        let dir = tempdir().unwrap();
899        let storage = make_storage_with_rooted_fs(dir.path());
900
901        let manifest = Manifest::new_full(
902            "greptime".to_string(),
903            vec!["public".to_string()],
904            TimeRange::unbounded(),
905            DataFormat::Parquet,
906        );
907
908        storage.write_manifest(&manifest).await.unwrap();
909        let loaded = storage.read_manifest().await.unwrap();
910
911        assert_eq!(loaded.catalog, manifest.catalog);
912        assert_eq!(loaded.schemas, manifest.schemas);
913        assert_eq!(loaded.schema_only, manifest.schema_only);
914        assert_eq!(loaded.format, manifest.format);
915        assert_eq!(loaded.snapshot_id, manifest.snapshot_id);
916    }
917
918    #[tokio::test]
919    async fn test_schema_round_trip() {
920        let dir = tempdir().unwrap();
921        let storage = make_storage_with_rooted_fs(dir.path());
922
923        let mut snapshot = SchemaSnapshot::new();
924        snapshot.add_schema(SchemaDefinition {
925            catalog: "greptime".to_string(),
926            name: "test_db".to_string(),
927            options: HashMap::from([("ttl".to_string(), "7d".to_string())]),
928        });
929
930        storage.write_schema(&snapshot).await.unwrap();
931        let loaded = storage.read_schema().await.unwrap();
932
933        assert_eq!(loaded, snapshot);
934    }
935
936    #[tokio::test]
937    async fn test_text_round_trip() {
938        let dir = tempdir().unwrap();
939        let storage = make_storage_with_rooted_fs(dir.path());
940        let content = "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX);";
941
942        storage
943            .write_text("schema/ddl/public.sql", content)
944            .await
945            .unwrap();
946        let loaded = storage.read_text("schema/ddl/public.sql").await.unwrap();
947
948        assert_eq!(loaded, content);
949    }
950
951    #[tokio::test]
952    async fn test_read_text_rejects_invalid_utf8() {
953        let dir = tempdir().unwrap();
954        let storage = make_storage_with_rooted_fs(dir.path());
955
956        storage
957            .write_file("schema/ddl/public.sql", vec![0xff, 0xfe, 0xfd])
958            .await
959            .unwrap();
960
961        let error = storage
962            .read_text("schema/ddl/public.sql")
963            .await
964            .unwrap_err();
965        assert!(error.to_string().contains("UTF-8"));
966    }
967
968    #[tokio::test]
969    async fn test_exists_follows_manifest_presence() {
970        let dir = tempdir().unwrap();
971        let storage = make_storage_with_rooted_fs(dir.path());
972
973        assert!(!storage.exists().await.unwrap());
974
975        storage
976            .write_manifest(&Manifest::new_schema_only(
977                "greptime".to_string(),
978                vec!["public".to_string()],
979            ))
980            .await
981            .unwrap();
982
983        assert!(storage.exists().await.unwrap());
984    }
985
986    #[tokio::test]
987    async fn test_delete_snapshot_only_removes_rooted_contents() {
988        let parent = tempdir().unwrap();
989        let snapshot_root = parent.path().join("snapshot");
990        let sibling = parent.path().join("sibling");
991        std::fs::create_dir_all(&snapshot_root).unwrap();
992        std::fs::create_dir_all(&sibling).unwrap();
993        std::fs::write(snapshot_root.join("manifest.json"), b"{}").unwrap();
994        std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
995
996        let storage = make_storage_with_rooted_fs(&snapshot_root);
997        storage.delete_snapshot().await.unwrap();
998
999        assert!(!snapshot_root.join("manifest.json").exists());
1000        assert!(sibling.join("keep.txt").exists());
1001    }
1002}