refactor: combine Copy To and Copy From (#1197)

* refactor: combine Copy To and Copy From

* Apply suggestions from code review

Co-authored-by: LFC <bayinamine@gmail.com>

* Apply suggestions from code review

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

---------

Co-authored-by: LFC <bayinamine@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
This commit is contained in:
Weny Xu
2023-03-20 19:23:25 +08:00
committed by GitHub
parent ad886f5b3e
commit e19c8fa2b6
19 changed files with 643 additions and 508 deletions

View File

@@ -0,0 +1,13 @@
[package]
name = "common-datasource"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
common-error = { path = "../error" }
futures.workspace = true
object-store = { path = "../../object-store" }
regex = "1.7"
snafu.workspace = true
url = "2.3"

View File

@@ -0,0 +1,75 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use common_error::prelude::*;
use url::ParseError;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Unsupported backend protocol: {}", protocol))]
UnsupportedBackendProtocol { protocol: String },
#[snafu(display("empty host: {}", url))]
EmptyHostPath { url: String },
#[snafu(display("Invalid path: {}", path))]
InvalidPath { path: String },
#[snafu(display("Invalid url: {}, error :{}", url, source))]
InvalidUrl { url: String, source: ParseError },
#[snafu(display("Failed to build backend, source: {}", source))]
BuildBackend {
source: object_store::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to list object in path: {}, source: {}", path, source))]
ListObjects {
path: String,
backtrace: Backtrace,
source: object_store::Error,
},
#[snafu(display("Invalid connection: {}", msg))]
InvalidConnection { msg: String },
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
use Error::*;
match self {
BuildBackend { .. } | ListObjects { .. } => StatusCode::StorageUnavailable,
UnsupportedBackendProtocol { .. }
| InvalidConnection { .. }
| InvalidUrl { .. }
| EmptyHostPath { .. }
| InvalidPath { .. } => StatusCode::InvalidArguments,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -0,0 +1,18 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod error;
pub mod lister;
pub mod object_store;
pub mod util;

View File

@@ -0,0 +1,81 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use futures::{future, TryStreamExt};
use object_store::{Object, ObjectStore};
use regex::Regex;
use snafu::ResultExt;
use crate::error::{self, Result};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Source {
Filename(String),
Dir,
}
pub struct Lister {
object_store: ObjectStore,
source: Source,
path: String,
regex: Option<Regex>,
}
impl Lister {
pub fn new(
object_store: ObjectStore,
source: Source,
path: String,
regex: Option<Regex>,
) -> Self {
Lister {
object_store,
source,
path,
regex,
}
}
pub async fn list(&self) -> Result<Vec<Object>> {
match &self.source {
Source::Dir => {
let streamer = self
.object_store
.object(&self.path)
.list()
.await
.context(error::ListObjectsSnafu { path: &self.path })?;
streamer
.try_filter(|f| {
let res = self
.regex
.as_ref()
.map(|x| x.is_match(f.name()))
.unwrap_or(true);
future::ready(res)
})
.try_collect::<Vec<_>>()
.await
.context(error::ListObjectsSnafu { path: &self.path })
}
Source::Filename(filename) => {
let obj = self
.object_store
.object(&format!("{}{}", self.path, filename));
Ok(vec![obj])
}
}
}
}

View File

@@ -0,0 +1,60 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod fs;
pub mod s3;
use std::collections::HashMap;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt};
use url::{ParseError, Url};
use self::fs::build_fs_backend;
use self::s3::build_s3_backend;
use crate::error::{self, Result};
pub const FS_SCHEMA: &str = "FS";
pub const S3_SCHEMA: &str = "S3";
/// parse url returns (schema,Option<host>,path)
pub fn parse_url(url: &str) -> Result<(String, Option<String>, String)> {
let parsed_url = Url::parse(url);
match parsed_url {
Ok(url) => Ok((
url.scheme().to_string(),
url.host_str().map(|s| s.to_string()),
url.path().to_string(),
)),
Err(ParseError::RelativeUrlWithoutBase) => {
Ok((FS_SCHEMA.to_string(), None, url.to_string()))
}
Err(err) => Err(err).context(error::InvalidUrlSnafu { url }),
}
}
pub fn build_backend(url: &str, connection: HashMap<String, String>) -> Result<ObjectStore> {
let (schema, host, _path) = parse_url(url)?;
match schema.to_uppercase().as_str() {
S3_SCHEMA => {
let host = host.context(error::EmptyHostPathSnafu {
url: url.to_string(),
})?;
Ok(build_s3_backend(&host, "/", connection)?)
}
FS_SCHEMA => Ok(build_fs_backend("/")?),
_ => error::UnsupportedBackendProtocolSnafu { protocol: schema }.fail(),
}
}

View File

@@ -0,0 +1,28 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use object_store::services::Fs;
use object_store::{ObjectStore, ObjectStoreBuilder};
use snafu::ResultExt;
use crate::error::{self, Result};
pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
let accessor = Fs::default()
.root(root)
.build()
.context(error::BuildBackendSnafu)?;
Ok(ObjectStore::new(accessor).finish())
}

View File

@@ -0,0 +1,79 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use object_store::services::S3;
use object_store::{ObjectStore, ObjectStoreBuilder};
use snafu::ResultExt;
use crate::error::{self, Result};
const ENDPOINT_URL: &str = "ENDPOINT_URL";
const ACCESS_KEY_ID: &str = "ACCESS_KEY_ID";
const SECRET_ACCESS_KEY: &str = "SECRET_ACCESS_KEY";
const SESSION_TOKEN: &str = "SESSION_TOKEN";
const REGION: &str = "REGION";
const ENABLE_VIRTUAL_HOST_STYLE: &str = "ENABLE_VIRTUAL_HOST_STYLE";
pub fn build_s3_backend(
host: &str,
path: &str,
connection: HashMap<String, String>,
) -> Result<ObjectStore> {
let mut builder = S3::default();
builder.root(path);
builder.bucket(host);
if let Some(endpoint) = connection.get(ENDPOINT_URL) {
builder.endpoint(endpoint);
}
if let Some(region) = connection.get(REGION) {
builder.region(region);
}
if let Some(key_id) = connection.get(ACCESS_KEY_ID) {
builder.access_key_id(key_id);
}
if let Some(key) = connection.get(SECRET_ACCESS_KEY) {
builder.secret_access_key(key);
}
if let Some(session_token) = connection.get(SESSION_TOKEN) {
builder.security_token(session_token);
}
if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) {
let enable = enable_str.as_str().parse::<bool>().map_err(|e| {
error::InvalidConnectionSnafu {
msg: format!(
"failed to parse the option {}={}, {}",
ENABLE_VIRTUAL_HOST_STYLE, enable_str, e
),
}
.build()
})?;
if enable {
builder.enable_virtual_host_style();
}
}
let accessor = builder.build().context(error::BuildBackendSnafu)?;
Ok(ObjectStore::new(accessor).finish())
}

View File

@@ -0,0 +1,125 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub fn find_dir_and_filename(path: &str) -> (String, Option<String>) {
if path.is_empty() {
("/".to_string(), None)
} else if path.ends_with('/') {
(path.to_string(), None)
} else if let Some(idx) = path.rfind('/') {
(
path[..idx + 1].to_string(),
Some(path[idx + 1..].to_string()),
)
} else {
("/".to_string(), Some(path.to_string()))
}
}
#[cfg(test)]
mod tests {
use url::Url;
use super::*;
#[test]
fn test_parse_uri() {
struct Test<'a> {
uri: &'a str,
expected_path: &'a str,
expected_schema: &'a str,
}
let tests = [
Test {
uri: "s3://bucket/to/path/",
expected_path: "/to/path/",
expected_schema: "s3",
},
Test {
uri: "fs:///to/path/",
expected_path: "/to/path/",
expected_schema: "fs",
},
Test {
uri: "fs:///to/path/file",
expected_path: "/to/path/file",
expected_schema: "fs",
},
];
for test in tests {
let parsed_uri = Url::parse(test.uri).unwrap();
assert_eq!(parsed_uri.path(), test.expected_path);
assert_eq!(parsed_uri.scheme(), test.expected_schema);
}
}
#[test]
fn test_parse_path_and_dir() {
let parsed = Url::from_file_path("/to/path/file").unwrap();
assert_eq!(parsed.path(), "/to/path/file");
let parsed = Url::from_directory_path("/to/path/").unwrap();
assert_eq!(parsed.path(), "/to/path/");
}
#[test]
fn test_find_dir_and_filename() {
struct Test<'a> {
path: &'a str,
expected_dir: &'a str,
expected_filename: Option<String>,
}
let tests = [
Test {
path: "to/path/",
expected_dir: "to/path/",
expected_filename: None,
},
Test {
path: "to/path/filename",
expected_dir: "to/path/",
expected_filename: Some("filename".into()),
},
Test {
path: "/to/path/filename",
expected_dir: "/to/path/",
expected_filename: Some("filename".into()),
},
Test {
path: "/",
expected_dir: "/",
expected_filename: None,
},
Test {
path: "filename",
expected_dir: "/",
expected_filename: Some("filename".into()),
},
Test {
path: "",
expected_dir: "/",
expected_filename: None,
},
];
for test in tests {
let (path, filename) = find_dir_and_filename(test.path);
assert_eq!(test.expected_dir, path);
assert_eq!(test.expected_filename, filename)
}
}
}