diff --git a/python/lancedb/db.py b/python/lancedb/db.py index f905aae3..9782163f 100644 --- a/python/lancedb/db.py +++ b/python/lancedb/db.py @@ -18,6 +18,7 @@ import pyarrow as pa from .common import URI, DATA from .table import LanceTable +from .util import get_uri_scheme class LanceDBConnection: @@ -26,10 +27,12 @@ class LanceDBConnection: """ def __init__(self, uri: URI): - if isinstance(uri, str): - uri = Path(uri) - uri = uri.expanduser().absolute() - Path(uri).mkdir(parents=True, exist_ok=True) + is_local = isinstance(uri, Path) or get_uri_scheme(uri) == "file" + if is_local: + if isinstance(uri, str): + uri = Path(uri) + uri = uri.expanduser().absolute() + Path(uri).mkdir(parents=True, exist_ok=True) self._uri = str(uri) @property @@ -43,7 +46,11 @@ class LanceDBConnection: ------- A list of table names. """ - return [p.stem for p in Path(self.uri).glob("*.lance")] + if get_uri_scheme(self.uri) == "file": + return [p.stem for p in Path(self.uri).glob("*.lance")] + raise NotImplementedError( + "List table_names is only supported for local filesystem for now" + ) def __len__(self) -> int: return len(self.table_names()) diff --git a/python/lancedb/util.py b/python/lancedb/util.py index 4444c881..92bb9315 100644 --- a/python/lancedb/util.py +++ b/python/lancedb/util.py @@ -35,4 +35,9 @@ def get_uri_scheme(uri: str) -> str: scheme = "file" elif scheme in ["s3a", "s3n"]: scheme = "s3" + elif len(scheme) == 1: + # Windows drive names are parsed as the scheme + # e.g. "c:\path" -> ParseResult(scheme="c", netloc="", path="/path", ...) + # So we add special handling here for schemes that are a single character + scheme = "file" return scheme diff --git a/python/tests/test_util.py b/python/tests/test_util.py index 687de11d..1090fa3d 100644 --- a/python/tests/test_util.py +++ b/python/tests/test_util.py @@ -21,8 +21,9 @@ def test_normalize_uri(): "file:///absolute/path", "s3://bucket/path", "gs://bucket/path", + "c:\\windows\\path", ] - schemes = ["file", "file", "file", "s3", "gs"] + schemes = ["file", "file", "file", "s3", "gs", "file"] for uri, expected_scheme in zip(uris, schemes): parsed_scheme = get_uri_scheme(uri)