mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-25 06:19:57 +00:00
I know there's a larger effort to have the python client based on the core rust implementation, but in the meantime there have been several issues (#1072 and #485) with some of the azure blob storage calls due to pyarrow not natively supporting an azure backend. To this end, I've added an optional import of the fsspec implementation of azure blob storage [`adlfs`](https://pypi.org/project/adlfs/) and passed it to `pyarrow.fs`. I've modified the existing test and manually verified it with some real credentials to make sure it behaves as expected. It should be now as simple as: ```python import lancedb db = lancedb.connect("az://blob_name/path") table = db.open_table("test") table.search(...) ``` Thank you for this cool project and we're excited to start using this for real shortly! 🎉 And thanks to @dwhitena for bringing it to my attention with his prediction guard posts. Co-authored-by: christiandilorenzo <christian.dilorenzo@infiniaml.com>
70 lines
2.1 KiB
Python
70 lines
2.1 KiB
Python
# Copyright 2023 LanceDB Developers
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
|
|
import lancedb
|
|
import pytest
|
|
|
|
# AWS:
|
|
# You need to setup AWS credentials an a base path to run this test. Example
|
|
# AWS_PROFILE=default TEST_S3_BASE_URL=s3://my_bucket/dataset pytest tests/test_io.py
|
|
#
|
|
# Azure:
|
|
# You need to setup Azure credentials an a base path to run this test. Example
|
|
# export AZURE_STORAGE_ACCOUNT_NAME="<account>"
|
|
# export AZURE_STORAGE_ACCOUNT_KEY="<key>"
|
|
# export REMOTE_BASE_URL=az://my_blob/dataset
|
|
# pytest tests/test_io.py
|
|
|
|
|
|
@pytest.fixture(autouse=True, scope="module")
|
|
def setup():
|
|
yield
|
|
|
|
if remote_url := os.environ.get("REMOTE_BASE_URL"):
|
|
db = lancedb.connect(remote_url)
|
|
|
|
for table in db.table_names():
|
|
db.drop_table(table)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
(os.environ.get("REMOTE_BASE_URL") is None),
|
|
reason="please setup remote base url",
|
|
)
|
|
def test_remote_io():
|
|
db = lancedb.connect(os.environ.get("REMOTE_BASE_URL"))
|
|
assert db.table_names() == []
|
|
|
|
table = db.create_table(
|
|
"test",
|
|
data=[
|
|
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
|
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
|
],
|
|
)
|
|
rs = table.search([100, 100]).limit(1).to_pandas()
|
|
assert len(rs) == 1
|
|
assert rs["item"].iloc[0] == "bar"
|
|
|
|
rs = table.search([100, 100]).where("price < 15").limit(2).to_pandas()
|
|
assert len(rs) == 1
|
|
assert rs["item"].iloc[0] == "foo"
|
|
|
|
assert db.table_names() == ["test"]
|
|
assert "test" in db
|
|
assert len(db) == 1
|
|
|
|
assert db.open_table("test").name == db["test"].name
|