From 3a2df0ce451e57bf88bae495bed2c1d3be4e803d Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Thu, 4 May 2023 09:47:03 -0700 Subject: [PATCH] Add method to get the URI scheme to support cloud storage --- .gitignore | 2 ++ python/lancedb/util.py | 38 ++++++++++++++++++++++++++++++++++++++ python/tests/test_util.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 python/lancedb/util.py create mode 100644 python/tests/test_util.py diff --git a/.gitignore b/.gitignore index 82107c69..40bd8a16 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ python/build python/dist notebooks/.ipynb_checkpoints + +**/.hypothesis diff --git a/python/lancedb/util.py b/python/lancedb/util.py new file mode 100644 index 00000000..4444c881 --- /dev/null +++ b/python/lancedb/util.py @@ -0,0 +1,38 @@ +# Copyright 2023 LanceDB Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from urllib.parse import ParseResult, urlparse + +from pyarrow import fs + + +def get_uri_scheme(uri: str) -> str: + """ + Get the scheme of a URI. If the URI does not have a scheme, assume it is a file URI. + + Parameters + ---------- + uri : str + The URI to parse. + + Returns + ------- + str: The scheme of the URI. + """ + parsed = urlparse(uri) + scheme = parsed.scheme + if not scheme: + scheme = "file" + elif scheme in ["s3a", "s3n"]: + scheme = "s3" + return scheme diff --git a/python/tests/test_util.py b/python/tests/test_util.py new file mode 100644 index 00000000..687de11d --- /dev/null +++ b/python/tests/test_util.py @@ -0,0 +1,29 @@ +# Copyright 2023 LanceDB Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from lancedb.util import get_uri_scheme + + +def test_normalize_uri(): + uris = [ + "relative/path", + "/absolute/path", + "file:///absolute/path", + "s3://bucket/path", + "gs://bucket/path", + ] + schemes = ["file", "file", "file", "s3", "gs"] + + for uri, expected_scheme in zip(uris, schemes): + parsed_scheme = get_uri_scheme(uri) + assert parsed_scheme == expected_scheme