Skip to main content

object_store/
util.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Display;
16use std::path;
17
18use common_error::root_source;
19use common_telemetry::{debug, error, info, warn};
20use opendal::ErrorKind;
21use opendal::layers::{
22    LoggingInterceptor, LoggingLayer, RetryEvent, RetryInterceptor, RetryLayer, TracingLayer,
23};
24use opendal::raw::{AccessorInfo, HttpClient, Operation};
25use opendal::services::FS_SCHEME;
26use snafu::ResultExt;
27
28use crate::config::HttpClientConfig;
29use crate::{ObjectStore, error};
30
31/// Returns true if the object store is not backed by local filesystem.
32pub fn is_object_storage(object_store: &ObjectStore) -> bool {
33    object_store.info().scheme() != FS_SCHEME
34}
35
36/// Join two paths and normalize the output dir.
37///
38/// The output dir is always ends with `/`. e.g.
39/// - `/a/b` join `c` => `/a/b/c/`
40/// - `/a/b` join `/c/` => `/a/b/c/`
41///
42/// All internal `//` will be replaced by `/`.
43pub fn join_dir(parent: &str, child: &str) -> String {
44    // Always adds a `/` to the output path.
45    let output = format!("{parent}/{child}/");
46    normalize_dir(&output)
47}
48
49/// Modified from the `opendal::raw::normalize_root`
50///
51/// # The different
52///
53/// It doesn't always append `/` ahead of the path,
54/// It only keeps `/` ahead if the original path starts with `/`.
55///
56/// Make sure the directory is normalized to style like `abc/def/`.
57///
58/// # Normalize Rules
59///
60/// - All whitespace will be trimmed: ` abc/def ` => `abc/def`
61/// - All leading / will be trimmed: `///abc` => `abc`
62/// - Internal // will be replaced by /: `abc///def` => `abc/def`
63/// - Empty path will be `/`: `` => `/`
64/// - **(Removed❗️)** ~~Add leading `/` if not starts with: `abc/` => `/abc/`~~
65/// - Add trailing `/` if not ends with: `/abc` => `/abc/`
66///
67/// Finally, we will got path like `/path/to/root/`.
68pub fn normalize_dir(v: &str) -> String {
69    let has_root = v.starts_with('/');
70    let mut v = v
71        .split('/')
72        .filter(|v| !v.is_empty())
73        .collect::<Vec<&str>>()
74        .join("/");
75    if has_root {
76        v.insert(0, '/');
77    }
78    if !v.ends_with('/') {
79        v.push('/')
80    }
81    v
82}
83
84/// Push `child` to `parent` dir and normalize the output path.
85///
86/// - Path endswith `/` means it's a dir path.
87/// - Otherwise, it's a file path.
88pub fn join_path(parent: &str, child: &str) -> String {
89    let output = format!("{parent}/{child}");
90    normalize_path(&output)
91}
92
93/// Make sure all operation are constructed by normalized path:
94///
95/// - Path endswith `/` means it's a dir path.
96/// - Otherwise, it's a file path.
97///
98/// # Normalize Rules
99///
100/// - All whitespace will be trimmed: ` abc/def ` => `abc/def`
101/// - Repeated leading / will be trimmed: `///abc` => `/abc`
102/// - Internal // will be replaced by /: `abc///def` => `abc/def`
103/// - Empty path will be `/`: `` => `/`
104pub fn normalize_path(path: &str) -> String {
105    // - all whitespace has been trimmed.
106    let path = path.trim();
107
108    // Fast line for empty path.
109    if path.is_empty() {
110        return "/".to_string();
111    }
112
113    let has_leading = path.starts_with('/');
114    let has_trailing = path.ends_with('/');
115
116    let mut p = path
117        .split('/')
118        .filter(|v| !v.is_empty())
119        .collect::<Vec<_>>()
120        .join("/");
121
122    // If path is not starting with `/` but it should
123    if !p.starts_with('/') && has_leading {
124        p.insert(0, '/');
125    }
126
127    // If path is not ending with `/` but it should
128    if !p.ends_with('/') && has_trailing {
129        p.push('/');
130    }
131
132    p
133}
134
135/// Attaches instrument layers to the object store.
136pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore {
137    object_store
138        .layer(LoggingLayer::new(DefaultLoggingInterceptor))
139        .layer(TracingLayer::new())
140        .layer(crate::layers::build_prometheus_metrics_layer(path_label))
141}
142
143/// Adds retry layer to the object store.
144pub fn with_retry_layers(object_store: ObjectStore) -> ObjectStore {
145    object_store.layer(
146        RetryLayer::new()
147            .with_jitter()
148            .with_notify(PrintDetailedError),
149    )
150}
151
152static LOGGING_TARGET: &str = "opendal::services";
153
154struct LoggingContext<'a>(&'a [(&'a str, &'a str)]);
155
156impl Display for LoggingContext<'_> {
157    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158        for (i, (k, v)) in self.0.iter().enumerate() {
159            if i > 0 {
160                write!(f, " {}={}", k, v)?;
161            } else {
162                write!(f, "{}={}", k, v)?;
163            }
164        }
165        Ok(())
166    }
167}
168
169#[derive(Debug, Copy, Clone, Default)]
170pub struct DefaultLoggingInterceptor;
171
172impl LoggingInterceptor for DefaultLoggingInterceptor {
173    #[inline]
174    fn log(
175        &self,
176        info: &AccessorInfo,
177        operation: Operation,
178        context: &[(&str, &str)],
179        message: &str,
180        err: Option<&opendal::Error>,
181    ) {
182        if let Some(err) = err {
183            let root = root_source(err);
184            // Print error if it's unexpected, otherwise in error.
185            if err.kind() == ErrorKind::Unexpected {
186                error!(
187                    target: LOGGING_TARGET,
188                    "service={} name={} {}: {operation} {message} {err:#?}, root={root:#?}",
189                    info.scheme(),
190                    info.name(),
191                    LoggingContext(context),
192                );
193            } else {
194                debug!(
195                    target: LOGGING_TARGET,
196                    "service={} name={} {}: {operation} {message} {err}, root={root:?}",
197                    info.scheme(),
198                    info.name(),
199                    LoggingContext(context),
200                );
201            };
202        }
203
204        debug!(
205            target: LOGGING_TARGET,
206            "service={} name={} {}: {operation} {message}",
207            info.scheme(),
208            info.name(),
209            LoggingContext(context),
210        );
211    }
212}
213
214pub(crate) fn build_http_client(config: &HttpClientConfig) -> error::Result<HttpClient> {
215    if config.skip_ssl_validation {
216        common_telemetry::warn!(
217            "Skipping SSL validation for object storage HTTP client. Please ensure the environment is trusted."
218        );
219    }
220
221    let client = reqwest::ClientBuilder::new()
222        .pool_max_idle_per_host(config.pool_max_idle_per_host as usize)
223        .connect_timeout(config.connect_timeout)
224        .pool_idle_timeout(config.pool_idle_timeout)
225        .timeout(config.timeout)
226        .danger_accept_invalid_certs(config.skip_ssl_validation)
227        .build()
228        .context(error::BuildHttpClientSnafu)?;
229    Ok(HttpClient::with(client))
230}
231
232pub fn clean_temp_dir(dir: &str) -> error::Result<()> {
233    if path::Path::new(&dir).exists() {
234        info!("Begin to clean temp storage directory: {}", dir);
235        std::fs::remove_dir_all(dir).context(error::RemoveDirSnafu { dir })?;
236        info!("Cleaned temp storage directory: {}", dir);
237    }
238
239    Ok(())
240}
241
242/// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
243pub struct PrintDetailedError;
244
245// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
246impl RetryInterceptor for PrintDetailedError {
247    fn intercept(&self, event: RetryEvent<'_>) {
248        warn!(
249            "Retry after {}s, error: {:#?}",
250            event.retry_after.as_secs_f64(),
251            event.err
252        );
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use opendal::services::Fs;
259
260    use super::*;
261    use crate::ObjectStore;
262    use crate::util::is_object_storage;
263
264    #[test]
265    fn test_normalize_dir() {
266        assert_eq!("/", normalize_dir("/"));
267        assert_eq!("/", normalize_dir(""));
268        assert_eq!("/test/", normalize_dir("/test"));
269    }
270
271    #[test]
272    fn test_join_dir() {
273        assert_eq!("/", join_dir("", ""));
274        assert_eq!("/", join_dir("/", ""));
275        assert_eq!("/", join_dir("", "/"));
276        assert_eq!("/", join_dir("/", "/"));
277        assert_eq!("/a/", join_dir("/a", ""));
278        assert_eq!("a/b/c/", join_dir("a/b", "c"));
279        assert_eq!("/a/b/c/", join_dir("/a/b", "c"));
280        assert_eq!("/a/b/c/", join_dir("/a/b", "c/"));
281        assert_eq!("/a/b/c/", join_dir("/a/b", "/c/"));
282        assert_eq!("/a/b/c/", join_dir("/a/b", "//c"));
283    }
284
285    #[test]
286    fn test_join_path() {
287        assert_eq!("/", join_path("", ""));
288        assert_eq!("/", join_path("/", ""));
289        assert_eq!("/", join_path("", "/"));
290        assert_eq!("/", join_path("/", "/"));
291        assert_eq!("a/", join_path("a", ""));
292        assert_eq!("/a", join_path("/", "a"));
293        assert_eq!("a/b/c.txt", join_path("a/b", "c.txt"));
294        assert_eq!("/a/b/c.txt", join_path("/a/b", "c.txt"));
295        assert_eq!("/a/b/c/", join_path("/a/b", "c/"));
296        assert_eq!("/a/b/c/", join_path("/a/b", "/c/"));
297        assert_eq!("/a/b/c.txt", join_path("/a/b", "//c.txt"));
298        assert_eq!("abc/def", join_path(" abc", "/def "));
299        assert_eq!("/abc", join_path("//", "/abc"));
300        assert_eq!("abc/def", join_path("abc/", "//def"));
301    }
302
303    #[test]
304    fn test_fs_is_not_object_storage() {
305        let object_store = ObjectStore::new(Fs::default().root("/tmp"))
306            .unwrap()
307            .finish();
308
309        assert_eq!(FS_SCHEME, object_store.info().scheme());
310        assert!(!is_object_storage(&object_store));
311    }
312}