Skip to main content

cli/data/
snapshot_storage.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Storage abstraction for Export/Import V2.
16//!
17//! This module provides a unified interface for reading and writing snapshot data
18//! to various storage backends (S3, OSS, GCS, Azure Blob, local filesystem).
19
20use async_trait::async_trait;
21use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
22use object_store::util::{with_instrument_layers, with_retry_layers};
23use object_store::{AzblobConnection, GcsConnection, ObjectStore, OssConnection, S3Connection};
24use snafu::ResultExt;
25use url::Url;
26
27use crate::common::ObjectStoreConfig;
28use crate::data::export_v2::error::{
29    BuildObjectStoreSnafu, InvalidUriSnafu, ManifestParseSnafu, ManifestSerializeSnafu, Result,
30    SnapshotNotFoundSnafu, StorageOperationSnafu, TextDecodeSnafu, UnsupportedSchemeSnafu,
31    UrlParseSnafu,
32};
33use crate::data::export_v2::manifest::{MANIFEST_FILE, Manifest};
34#[cfg(test)]
35use crate::data::export_v2::schema::SchemaDefinition;
36use crate::data::export_v2::schema::{SCHEMA_DIR, SCHEMAS_FILE, SchemaSnapshot};
37
38struct RemoteLocation {
39    bucket_or_container: String,
40    root: String,
41}
42
43/// URI schemes supported for snapshot storage.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum StorageScheme {
46    /// Amazon S3.
47    S3,
48    /// Alibaba Cloud OSS.
49    Oss,
50    /// Google Cloud Storage.
51    Gcs,
52    /// Azure Blob Storage.
53    Azblob,
54    /// Local filesystem (file://).
55    File,
56}
57
58impl StorageScheme {
59    /// Parses storage scheme from URI.
60    pub fn from_uri(uri: &str) -> Result<Self> {
61        let url = Url::parse(uri).context(UrlParseSnafu)?;
62
63        match url.scheme() {
64            "s3" => Ok(Self::S3),
65            "oss" => Ok(Self::Oss),
66            "gs" | "gcs" => Ok(Self::Gcs),
67            "azblob" => Ok(Self::Azblob),
68            "file" => Ok(Self::File),
69            scheme => UnsupportedSchemeSnafu { scheme }.fail(),
70        }
71    }
72}
73
74/// Extracts bucket/container and root path from a URI.
75fn extract_remote_location(uri: &str) -> Result<RemoteLocation> {
76    let url = Url::parse(uri).context(UrlParseSnafu)?;
77    let bucket_or_container = url.host_str().unwrap_or("").to_string();
78    if bucket_or_container.is_empty() {
79        return InvalidUriSnafu {
80            uri,
81            reason: "URI must include bucket/container in host",
82        }
83        .fail();
84    }
85
86    let root = url.path().trim_start_matches('/').to_string();
87    if root.is_empty() {
88        return InvalidUriSnafu {
89            uri,
90            reason: "snapshot URI must include a non-empty path after the bucket/container",
91        }
92        .fail();
93    }
94
95    Ok(RemoteLocation {
96        bucket_or_container,
97        root,
98    })
99}
100
101/// Validates that a URI has a proper scheme.
102///
103/// Rejects bare paths (e.g., `/tmp/backup`, `./backup`) because:
104/// - Schema export (CLI) and data export (server) run in different processes
105/// - Using bare paths would split the snapshot across machines
106///
107/// Supported URI schemes:
108/// - `s3://bucket/path` - Amazon S3
109/// - `oss://bucket/path` - Alibaba Cloud OSS
110/// - `gs://bucket/path` - Google Cloud Storage
111/// - `azblob://container/path` - Azure Blob Storage
112/// - `file:///absolute/path` - Local filesystem
113pub fn validate_uri(uri: &str) -> Result<StorageScheme> {
114    // Must have a scheme
115    if !uri.contains("://") {
116        return InvalidUriSnafu {
117            uri,
118            reason: "URI must have a scheme (e.g., s3://, file://). Bare paths are not supported.",
119        }
120        .fail();
121    }
122
123    StorageScheme::from_uri(uri)
124}
125
126fn schema_index_path() -> String {
127    format!("{}/{}", SCHEMA_DIR, SCHEMAS_FILE)
128}
129
130/// Extracts the absolute filesystem path from a file:// URI.
131fn extract_file_path_from_uri(uri: &str) -> Result<String> {
132    let url = Url::parse(uri).context(UrlParseSnafu)?;
133
134    match url.host_str() {
135        Some(host) if !host.is_empty() && host != "localhost" => InvalidUriSnafu {
136            uri,
137            reason: "file:// URI must use an absolute path like file:///tmp/backup",
138        }
139        .fail(),
140        _ => url
141            .to_file_path()
142            .map(|path| path.to_string_lossy().into_owned())
143            .map_err(|_| {
144                InvalidUriSnafu {
145                    uri,
146                    reason: "file:// URI must use a valid absolute filesystem path",
147                }
148                .build()
149            }),
150    }
151}
152
153async fn ensure_snapshot_exists(storage: &OpenDalStorage) -> Result<()> {
154    if storage.exists().await? {
155        Ok(())
156    } else {
157        SnapshotNotFoundSnafu {
158            uri: storage.target_uri.as_str(),
159        }
160        .fail()
161    }
162}
163
164/// Snapshot storage abstraction.
165///
166/// Provides operations for reading and writing snapshot data to various storage backends.
167#[async_trait]
168pub trait SnapshotStorage: Send + Sync {
169    /// Checks if a snapshot exists at this location (manifest.json exists).
170    async fn exists(&self) -> Result<bool>;
171
172    /// Reads the manifest file.
173    async fn read_manifest(&self) -> Result<Manifest>;
174
175    /// Writes the manifest file.
176    async fn write_manifest(&self, manifest: &Manifest) -> Result<()>;
177
178    /// Writes the schema index to schema/schemas.json.
179    async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()>;
180
181    /// Writes a text file to a relative path under the snapshot root.
182    async fn write_text(&self, path: &str, content: &str) -> Result<()>;
183
184    /// Reads a text file from a relative path under the snapshot root.
185    async fn read_text(&self, path: &str) -> Result<String>;
186
187    /// Deletes the entire snapshot (for --force).
188    async fn delete_snapshot(&self) -> Result<()>;
189}
190
191/// OpenDAL-based implementation of SnapshotStorage.
192pub struct OpenDalStorage {
193    object_store: ObjectStore,
194    target_uri: String,
195}
196
197impl OpenDalStorage {
198    fn new_operator_rooted(object_store: ObjectStore, target_uri: &str) -> Self {
199        Self {
200            object_store,
201            target_uri: target_uri.to_string(),
202        }
203    }
204
205    fn finish_local_store(object_store: ObjectStore) -> ObjectStore {
206        with_instrument_layers(object_store, false)
207    }
208
209    fn finish_remote_store(object_store: ObjectStore) -> ObjectStore {
210        with_instrument_layers(with_retry_layers(object_store), false)
211    }
212
213    fn ensure_backend_enabled(uri: &str, enabled: bool, reason: &'static str) -> Result<()> {
214        if enabled {
215            Ok(())
216        } else {
217            InvalidUriSnafu { uri, reason }.fail()
218        }
219    }
220
221    fn validate_remote_config<E: std::fmt::Display>(
222        uri: &str,
223        backend: &str,
224        result: std::result::Result<(), E>,
225    ) -> Result<()> {
226        result.map_err(|error| {
227            InvalidUriSnafu {
228                uri,
229                reason: format!("invalid {} config: {}", backend, error),
230            }
231            .build()
232        })
233    }
234
235    /// Creates a new storage from a file:// URI.
236    pub fn from_file_uri(uri: &str) -> Result<Self> {
237        let path = extract_file_path_from_uri(uri)?;
238
239        let builder = Fs::default().root(&path);
240        let object_store = ObjectStore::new(builder)
241            .context(BuildObjectStoreSnafu)?
242            .finish();
243        Ok(Self::new_operator_rooted(
244            Self::finish_local_store(object_store),
245            uri,
246        ))
247    }
248
249    fn from_file_uri_with_config(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
250        if storage.enable_s3 || storage.enable_oss || storage.enable_gcs || storage.enable_azblob {
251            return InvalidUriSnafu {
252                uri,
253                reason: "file:// cannot be used with remote storage flags",
254            }
255            .fail();
256        }
257
258        Self::from_file_uri(uri)
259    }
260
261    fn from_s3_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
262        Self::ensure_backend_enabled(
263            uri,
264            storage.enable_s3,
265            "s3:// requires --s3 and related options",
266        )?;
267
268        let location = extract_remote_location(uri)?;
269        let mut config = storage.s3.clone();
270        config.s3_bucket = location.bucket_or_container;
271        config.s3_root = location.root;
272        Self::validate_remote_config(uri, "s3", config.validate())?;
273
274        let conn: S3Connection = config.into();
275        let object_store = ObjectStore::new(S3::from(&conn))
276            .context(BuildObjectStoreSnafu)?
277            .finish();
278        Ok(Self::new_operator_rooted(
279            Self::finish_remote_store(object_store),
280            uri,
281        ))
282    }
283
284    fn from_oss_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
285        Self::ensure_backend_enabled(
286            uri,
287            storage.enable_oss,
288            "oss:// requires --oss and related options",
289        )?;
290
291        let location = extract_remote_location(uri)?;
292        let mut config = storage.oss.clone();
293        config.oss_bucket = location.bucket_or_container;
294        config.oss_root = location.root;
295        Self::validate_remote_config(uri, "oss", config.validate())?;
296
297        let conn: OssConnection = config.into();
298        let object_store = ObjectStore::new(Oss::from(&conn))
299            .context(BuildObjectStoreSnafu)?
300            .finish();
301        Ok(Self::new_operator_rooted(
302            Self::finish_remote_store(object_store),
303            uri,
304        ))
305    }
306
307    fn from_gcs_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
308        Self::ensure_backend_enabled(
309            uri,
310            storage.enable_gcs,
311            "gs:// or gcs:// requires --gcs and related options",
312        )?;
313
314        let location = extract_remote_location(uri)?;
315        let mut config = storage.gcs.clone();
316        config.gcs_bucket = location.bucket_or_container;
317        config.gcs_root = location.root;
318        Self::validate_remote_config(uri, "gcs", config.validate())?;
319
320        let conn: GcsConnection = config.into();
321        let object_store = ObjectStore::new(Gcs::from(&conn))
322            .context(BuildObjectStoreSnafu)?
323            .finish();
324        Ok(Self::new_operator_rooted(
325            Self::finish_remote_store(object_store),
326            uri,
327        ))
328    }
329
330    fn from_azblob_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
331        Self::ensure_backend_enabled(
332            uri,
333            storage.enable_azblob,
334            "azblob:// requires --azblob and related options",
335        )?;
336
337        let location = extract_remote_location(uri)?;
338        let mut config = storage.azblob.clone();
339        config.azblob_container = location.bucket_or_container;
340        config.azblob_root = location.root;
341        Self::validate_remote_config(uri, "azblob", config.validate())?;
342
343        let conn: AzblobConnection = config.into();
344        let object_store = ObjectStore::new(Azblob::from(&conn))
345            .context(BuildObjectStoreSnafu)?
346            .finish();
347        Ok(Self::new_operator_rooted(
348            Self::finish_remote_store(object_store),
349            uri,
350        ))
351    }
352
353    /// Creates a new storage from a URI and object store config.
354    pub fn from_uri(uri: &str, storage: &ObjectStoreConfig) -> Result<Self> {
355        match StorageScheme::from_uri(uri)? {
356            StorageScheme::File => Self::from_file_uri_with_config(uri, storage),
357            StorageScheme::S3 => Self::from_s3_uri(uri, storage),
358            StorageScheme::Oss => Self::from_oss_uri(uri, storage),
359            StorageScheme::Gcs => Self::from_gcs_uri(uri, storage),
360            StorageScheme::Azblob => Self::from_azblob_uri(uri, storage),
361        }
362    }
363
364    /// Reads a file as bytes.
365    async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
366        let data = self
367            .object_store
368            .read(path)
369            .await
370            .context(StorageOperationSnafu {
371                operation: format!("read {}", path),
372            })?;
373        Ok(data.to_vec())
374    }
375
376    /// Writes bytes to a file.
377    async fn write_file(&self, path: &str, data: Vec<u8>) -> Result<()> {
378        self.object_store
379            .write(path, data)
380            .await
381            .map(|_| ())
382            .context(StorageOperationSnafu {
383                operation: format!("write {}", path),
384            })
385    }
386
387    /// Checks if a file exists using stat.
388    async fn file_exists(&self, path: &str) -> Result<bool> {
389        match self.object_store.stat(path).await {
390            Ok(_) => Ok(true),
391            Err(e) if e.kind() == object_store::ErrorKind::NotFound => Ok(false),
392            Err(e) => Err(e).context(StorageOperationSnafu {
393                operation: format!("check exists {}", path),
394            }),
395        }
396    }
397
398    #[cfg(test)]
399    pub async fn read_schema(&self) -> Result<SchemaSnapshot> {
400        let schemas_path = schema_index_path();
401        let schemas: Vec<SchemaDefinition> = if self.file_exists(&schemas_path).await? {
402            let data = self.read_file(&schemas_path).await?;
403            serde_json::from_slice(&data).context(ManifestParseSnafu)?
404        } else {
405            vec![]
406        };
407
408        Ok(SchemaSnapshot { schemas })
409    }
410}
411
412#[async_trait]
413impl SnapshotStorage for OpenDalStorage {
414    async fn exists(&self) -> Result<bool> {
415        self.file_exists(MANIFEST_FILE).await
416    }
417
418    async fn read_manifest(&self) -> Result<Manifest> {
419        ensure_snapshot_exists(self).await?;
420
421        let data = self.read_file(MANIFEST_FILE).await?;
422        serde_json::from_slice(&data).context(ManifestParseSnafu)
423    }
424
425    async fn write_manifest(&self, manifest: &Manifest) -> Result<()> {
426        let data = serde_json::to_vec_pretty(manifest).context(ManifestSerializeSnafu)?;
427        self.write_file(MANIFEST_FILE, data).await
428    }
429
430    async fn write_schema(&self, schema: &SchemaSnapshot) -> Result<()> {
431        let schemas_path = schema_index_path();
432        let schemas_data =
433            serde_json::to_vec_pretty(&schema.schemas).context(ManifestSerializeSnafu)?;
434        self.write_file(&schemas_path, schemas_data).await
435    }
436
437    async fn write_text(&self, path: &str, content: &str) -> Result<()> {
438        self.write_file(path, content.as_bytes().to_vec()).await
439    }
440
441    async fn read_text(&self, path: &str) -> Result<String> {
442        let data = self.read_file(path).await?;
443        String::from_utf8(data).context(TextDecodeSnafu)
444    }
445
446    async fn delete_snapshot(&self) -> Result<()> {
447        self.object_store
448            .remove_all("/")
449            .await
450            .context(StorageOperationSnafu {
451                operation: "delete snapshot",
452            })
453    }
454}
455
456#[cfg(test)]
457mod tests {
458    use std::collections::HashMap;
459    use std::path::Path;
460
461    use object_store::ObjectStore;
462    use object_store::services::Fs;
463    use tempfile::tempdir;
464    use url::Url;
465
466    use super::*;
467    use crate::data::export_v2::manifest::{DataFormat, TimeRange};
468    use crate::data::export_v2::schema::SchemaDefinition;
469
470    fn make_storage_with_rooted_fs(dir: &std::path::Path) -> OpenDalStorage {
471        let object_store = ObjectStore::new(Fs::default().root(dir.to_str().unwrap()))
472            .unwrap()
473            .finish();
474        OpenDalStorage::new_operator_rooted(
475            OpenDalStorage::finish_local_store(object_store),
476            Url::from_directory_path(dir).unwrap().as_ref(),
477        )
478    }
479
480    #[test]
481    fn test_validate_uri_valid() {
482        assert_eq!(validate_uri("s3://bucket/path").unwrap(), StorageScheme::S3);
483        assert_eq!(
484            validate_uri("oss://bucket/path").unwrap(),
485            StorageScheme::Oss
486        );
487        assert_eq!(
488            validate_uri("gs://bucket/path").unwrap(),
489            StorageScheme::Gcs
490        );
491        assert_eq!(
492            validate_uri("gcs://bucket/path").unwrap(),
493            StorageScheme::Gcs
494        );
495        assert_eq!(
496            validate_uri("azblob://container/path").unwrap(),
497            StorageScheme::Azblob
498        );
499        assert_eq!(
500            validate_uri("file:///tmp/backup").unwrap(),
501            StorageScheme::File
502        );
503    }
504
505    #[test]
506    fn test_validate_uri_invalid() {
507        // Bare paths should be rejected
508        assert!(validate_uri("/tmp/backup").is_err());
509        assert!(validate_uri("./backup").is_err());
510        assert!(validate_uri("backup").is_err());
511
512        // Unknown schemes
513        assert!(validate_uri("ftp://server/path").is_err());
514    }
515
516    #[test]
517    fn test_extract_remote_location_requires_non_empty_root() {
518        assert!(extract_remote_location("s3://bucket").is_err());
519        assert!(extract_remote_location("s3://bucket/").is_err());
520        assert!(extract_remote_location("oss://bucket").is_err());
521        assert!(extract_remote_location("gs://bucket").is_err());
522        assert!(extract_remote_location("azblob://container").is_err());
523    }
524
525    #[cfg(not(windows))]
526    #[test]
527    fn test_extract_path_from_uri_unix_examples() {
528        assert_eq!(
529            extract_file_path_from_uri("file:///tmp/backup").unwrap(),
530            "/tmp/backup"
531        );
532        assert_eq!(
533            extract_file_path_from_uri("file://localhost/tmp/backup").unwrap(),
534            "/tmp/backup"
535        );
536    }
537
538    #[test]
539    fn test_extract_file_path_from_uri_rejects_file_host() {
540        assert!(extract_file_path_from_uri("file://tmp/backup").is_err());
541    }
542
543    #[test]
544    fn test_extract_file_path_from_uri_round_trips_directory_url() {
545        let dir = tempdir().unwrap();
546        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
547        let path = extract_file_path_from_uri(&uri).unwrap();
548
549        assert_eq!(Path::new(&path), dir.path());
550    }
551
552    #[tokio::test]
553    async fn test_read_manifest_reports_requested_uri() {
554        let dir = tempdir().unwrap();
555        let uri = Url::from_directory_path(dir.path()).unwrap().to_string();
556        let storage = OpenDalStorage::from_file_uri(&uri).unwrap();
557
558        let error = storage.read_manifest().await.unwrap_err().to_string();
559
560        assert!(error.contains(uri.as_str()));
561    }
562
563    #[tokio::test]
564    async fn test_manifest_round_trip() {
565        let dir = tempdir().unwrap();
566        let storage = make_storage_with_rooted_fs(dir.path());
567
568        let manifest = Manifest::new_full(
569            "greptime".to_string(),
570            vec!["public".to_string()],
571            TimeRange::unbounded(),
572            DataFormat::Parquet,
573        );
574
575        storage.write_manifest(&manifest).await.unwrap();
576        let loaded = storage.read_manifest().await.unwrap();
577
578        assert_eq!(loaded.catalog, manifest.catalog);
579        assert_eq!(loaded.schemas, manifest.schemas);
580        assert_eq!(loaded.schema_only, manifest.schema_only);
581        assert_eq!(loaded.format, manifest.format);
582        assert_eq!(loaded.snapshot_id, manifest.snapshot_id);
583    }
584
585    #[tokio::test]
586    async fn test_schema_round_trip() {
587        let dir = tempdir().unwrap();
588        let storage = make_storage_with_rooted_fs(dir.path());
589
590        let mut snapshot = SchemaSnapshot::new();
591        snapshot.add_schema(SchemaDefinition {
592            catalog: "greptime".to_string(),
593            name: "test_db".to_string(),
594            options: HashMap::from([("ttl".to_string(), "7d".to_string())]),
595        });
596
597        storage.write_schema(&snapshot).await.unwrap();
598        let loaded = storage.read_schema().await.unwrap();
599
600        assert_eq!(loaded, snapshot);
601    }
602
603    #[tokio::test]
604    async fn test_text_round_trip() {
605        let dir = tempdir().unwrap();
606        let storage = make_storage_with_rooted_fs(dir.path());
607        let content = "CREATE TABLE metrics (ts TIMESTAMP TIME INDEX);";
608
609        storage
610            .write_text("schema/ddl/public.sql", content)
611            .await
612            .unwrap();
613        let loaded = storage.read_text("schema/ddl/public.sql").await.unwrap();
614
615        assert_eq!(loaded, content);
616    }
617
618    #[tokio::test]
619    async fn test_read_text_rejects_invalid_utf8() {
620        let dir = tempdir().unwrap();
621        let storage = make_storage_with_rooted_fs(dir.path());
622
623        storage
624            .write_file("schema/ddl/public.sql", vec![0xff, 0xfe, 0xfd])
625            .await
626            .unwrap();
627
628        let error = storage
629            .read_text("schema/ddl/public.sql")
630            .await
631            .unwrap_err();
632        assert!(error.to_string().contains("UTF-8"));
633    }
634
635    #[tokio::test]
636    async fn test_exists_follows_manifest_presence() {
637        let dir = tempdir().unwrap();
638        let storage = make_storage_with_rooted_fs(dir.path());
639
640        assert!(!storage.exists().await.unwrap());
641
642        storage
643            .write_manifest(&Manifest::new_schema_only(
644                "greptime".to_string(),
645                vec!["public".to_string()],
646            ))
647            .await
648            .unwrap();
649
650        assert!(storage.exists().await.unwrap());
651    }
652
653    #[tokio::test]
654    async fn test_delete_snapshot_only_removes_rooted_contents() {
655        let parent = tempdir().unwrap();
656        let snapshot_root = parent.path().join("snapshot");
657        let sibling = parent.path().join("sibling");
658        std::fs::create_dir_all(&snapshot_root).unwrap();
659        std::fs::create_dir_all(&sibling).unwrap();
660        std::fs::write(snapshot_root.join("manifest.json"), b"{}").unwrap();
661        std::fs::write(sibling.join("keep.txt"), b"keep").unwrap();
662
663        let storage = make_storage_with_rooted_fs(&snapshot_root);
664        storage.delete_snapshot().await.unwrap();
665
666        assert!(!snapshot_root.join("manifest.json").exists());
667        assert!(sibling.join("keep.txt").exists());
668    }
669}