Skip to main content

object_store/
util.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Display;
16use std::path;
17
18use common_error::root_source;
19use common_telemetry::{debug, error, info, warn};
20use opendal::ErrorKind;
21use opendal::layers::{
22    LoggingInterceptor, LoggingLayer, RetryEvent, RetryInterceptor, RetryLayer, TracingLayer,
23};
24use opendal::raw::{AccessorInfo, HttpClient, Operation};
25use snafu::ResultExt;
26
27use crate::config::HttpClientConfig;
28use crate::{ObjectStore, error};
29
30/// Join two paths and normalize the output dir.
31///
32/// The output dir is always ends with `/`. e.g.
33/// - `/a/b` join `c` => `/a/b/c/`
34/// - `/a/b` join `/c/` => `/a/b/c/`
35///
36/// All internal `//` will be replaced by `/`.
37pub fn join_dir(parent: &str, child: &str) -> String {
38    // Always adds a `/` to the output path.
39    let output = format!("{parent}/{child}/");
40    normalize_dir(&output)
41}
42
43/// Modified from the `opendal::raw::normalize_root`
44///
45/// # The different
46///
47/// It doesn't always append `/` ahead of the path,
48/// It only keeps `/` ahead if the original path starts with `/`.
49///
50/// Make sure the directory is normalized to style like `abc/def/`.
51///
52/// # Normalize Rules
53///
54/// - All whitespace will be trimmed: ` abc/def ` => `abc/def`
55/// - All leading / will be trimmed: `///abc` => `abc`
56/// - Internal // will be replaced by /: `abc///def` => `abc/def`
57/// - Empty path will be `/`: `` => `/`
58/// - **(Removed❗️)** ~~Add leading `/` if not starts with: `abc/` => `/abc/`~~
59/// - Add trailing `/` if not ends with: `/abc` => `/abc/`
60///
61/// Finally, we will got path like `/path/to/root/`.
62pub fn normalize_dir(v: &str) -> String {
63    let has_root = v.starts_with('/');
64    let mut v = v
65        .split('/')
66        .filter(|v| !v.is_empty())
67        .collect::<Vec<&str>>()
68        .join("/");
69    if has_root {
70        v.insert(0, '/');
71    }
72    if !v.ends_with('/') {
73        v.push('/')
74    }
75    v
76}
77
78/// Push `child` to `parent` dir and normalize the output path.
79///
80/// - Path endswith `/` means it's a dir path.
81/// - Otherwise, it's a file path.
82pub fn join_path(parent: &str, child: &str) -> String {
83    let output = format!("{parent}/{child}");
84    normalize_path(&output)
85}
86
87/// Make sure all operation are constructed by normalized path:
88///
89/// - Path endswith `/` means it's a dir path.
90/// - Otherwise, it's a file path.
91///
92/// # Normalize Rules
93///
94/// - All whitespace will be trimmed: ` abc/def ` => `abc/def`
95/// - Repeated leading / will be trimmed: `///abc` => `/abc`
96/// - Internal // will be replaced by /: `abc///def` => `abc/def`
97/// - Empty path will be `/`: `` => `/`
98pub fn normalize_path(path: &str) -> String {
99    // - all whitespace has been trimmed.
100    let path = path.trim();
101
102    // Fast line for empty path.
103    if path.is_empty() {
104        return "/".to_string();
105    }
106
107    let has_leading = path.starts_with('/');
108    let has_trailing = path.ends_with('/');
109
110    let mut p = path
111        .split('/')
112        .filter(|v| !v.is_empty())
113        .collect::<Vec<_>>()
114        .join("/");
115
116    // If path is not starting with `/` but it should
117    if !p.starts_with('/') && has_leading {
118        p.insert(0, '/');
119    }
120
121    // If path is not ending with `/` but it should
122    if !p.ends_with('/') && has_trailing {
123        p.push('/');
124    }
125
126    p
127}
128
129/// Attaches instrument layers to the object store.
130pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore {
131    object_store
132        .layer(LoggingLayer::new(DefaultLoggingInterceptor))
133        .layer(TracingLayer::new())
134        .layer(crate::layers::build_prometheus_metrics_layer(path_label))
135}
136
137/// Adds retry layer to the object store.
138pub fn with_retry_layers(object_store: ObjectStore) -> ObjectStore {
139    object_store.layer(
140        RetryLayer::new()
141            .with_jitter()
142            .with_notify(PrintDetailedError),
143    )
144}
145
146static LOGGING_TARGET: &str = "opendal::services";
147
148struct LoggingContext<'a>(&'a [(&'a str, &'a str)]);
149
150impl Display for LoggingContext<'_> {
151    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152        for (i, (k, v)) in self.0.iter().enumerate() {
153            if i > 0 {
154                write!(f, " {}={}", k, v)?;
155            } else {
156                write!(f, "{}={}", k, v)?;
157            }
158        }
159        Ok(())
160    }
161}
162
163#[derive(Debug, Copy, Clone, Default)]
164pub struct DefaultLoggingInterceptor;
165
166impl LoggingInterceptor for DefaultLoggingInterceptor {
167    #[inline]
168    fn log(
169        &self,
170        info: &AccessorInfo,
171        operation: Operation,
172        context: &[(&str, &str)],
173        message: &str,
174        err: Option<&opendal::Error>,
175    ) {
176        if let Some(err) = err {
177            let root = root_source(err);
178            // Print error if it's unexpected, otherwise in error.
179            if err.kind() == ErrorKind::Unexpected {
180                error!(
181                    target: LOGGING_TARGET,
182                    "service={} name={} {}: {operation} {message} {err:#?}, root={root:#?}",
183                    info.scheme(),
184                    info.name(),
185                    LoggingContext(context),
186                );
187            } else {
188                debug!(
189                    target: LOGGING_TARGET,
190                    "service={} name={} {}: {operation} {message} {err}, root={root:?}",
191                    info.scheme(),
192                    info.name(),
193                    LoggingContext(context),
194                );
195            };
196        }
197
198        debug!(
199            target: LOGGING_TARGET,
200            "service={} name={} {}: {operation} {message}",
201            info.scheme(),
202            info.name(),
203            LoggingContext(context),
204        );
205    }
206}
207
208pub(crate) fn build_http_client(config: &HttpClientConfig) -> error::Result<HttpClient> {
209    if config.skip_ssl_validation {
210        common_telemetry::warn!(
211            "Skipping SSL validation for object storage HTTP client. Please ensure the environment is trusted."
212        );
213    }
214
215    let client = reqwest::ClientBuilder::new()
216        .pool_max_idle_per_host(config.pool_max_idle_per_host as usize)
217        .connect_timeout(config.connect_timeout)
218        .pool_idle_timeout(config.pool_idle_timeout)
219        .timeout(config.timeout)
220        .danger_accept_invalid_certs(config.skip_ssl_validation)
221        .build()
222        .context(error::BuildHttpClientSnafu)?;
223    Ok(HttpClient::with(client))
224}
225
226pub fn clean_temp_dir(dir: &str) -> error::Result<()> {
227    if path::Path::new(&dir).exists() {
228        info!("Begin to clean temp storage directory: {}", dir);
229        std::fs::remove_dir_all(dir).context(error::RemoveDirSnafu { dir })?;
230        info!("Cleaned temp storage directory: {}", dir);
231    }
232
233    Ok(())
234}
235
236/// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
237pub struct PrintDetailedError;
238
239// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
240impl RetryInterceptor for PrintDetailedError {
241    fn intercept(&self, event: RetryEvent<'_>) {
242        warn!(
243            "Retry after {}s, error: {:#?}",
244            event.retry_after.as_secs_f64(),
245            event.err
246        );
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_normalize_dir() {
256        assert_eq!("/", normalize_dir("/"));
257        assert_eq!("/", normalize_dir(""));
258        assert_eq!("/test/", normalize_dir("/test"));
259    }
260
261    #[test]
262    fn test_join_dir() {
263        assert_eq!("/", join_dir("", ""));
264        assert_eq!("/", join_dir("/", ""));
265        assert_eq!("/", join_dir("", "/"));
266        assert_eq!("/", join_dir("/", "/"));
267        assert_eq!("/a/", join_dir("/a", ""));
268        assert_eq!("a/b/c/", join_dir("a/b", "c"));
269        assert_eq!("/a/b/c/", join_dir("/a/b", "c"));
270        assert_eq!("/a/b/c/", join_dir("/a/b", "c/"));
271        assert_eq!("/a/b/c/", join_dir("/a/b", "/c/"));
272        assert_eq!("/a/b/c/", join_dir("/a/b", "//c"));
273    }
274
275    #[test]
276    fn test_join_path() {
277        assert_eq!("/", join_path("", ""));
278        assert_eq!("/", join_path("/", ""));
279        assert_eq!("/", join_path("", "/"));
280        assert_eq!("/", join_path("/", "/"));
281        assert_eq!("a/", join_path("a", ""));
282        assert_eq!("/a", join_path("/", "a"));
283        assert_eq!("a/b/c.txt", join_path("a/b", "c.txt"));
284        assert_eq!("/a/b/c.txt", join_path("/a/b", "c.txt"));
285        assert_eq!("/a/b/c/", join_path("/a/b", "c/"));
286        assert_eq!("/a/b/c/", join_path("/a/b", "/c/"));
287        assert_eq!("/a/b/c.txt", join_path("/a/b", "//c.txt"));
288        assert_eq!("abc/def", join_path(" abc", "/def "));
289        assert_eq!("/abc", join_path("//", "/abc"));
290        assert_eq!("abc/def", join_path("abc/", "//def"));
291    }
292}