mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 06:39:57 +00:00
feat: add support for add to async python API (#1037)
In order to add support for `add` we needed to migrate the rust `Table`
trait to a `Table` struct and `TableInternal` trait (similar to the way
the connection is designed).
While doing this we also cleaned up some inconsistencies between the
SDKs:
* Python and Node are garbage collected languages and it can be
difficult to trigger something to be freed. The convention for these
languages is to have some kind of close method. I added a close method
to both the table and connection which will drop the underlying rust
object.
* We made significant improvements to table creation in
cc5f2136a6
for the `node` SDK. I copied these changes to the `nodejs` SDK.
* The nodejs tables were using fs to create tmp directories and these
were not getting cleaned up. This is mostly harmless but annoying and so
I changed it up a bit to ensure we cleanup tmp directories.
* ~~countRows in the node SDK was returning `bigint`. I changed it to
return `number`~~ (this actually happened in a previous PR)
* Tables and connections now implement `std::fmt::Display` which is
hooked into python's `__repr__`. Node has no concept of a regular "to
string" function and so I added a `display` method.
* Python method signatures are changing so that optional parameters are
always `Optional[foo] = None` instead of something like `foo = False`.
This is because we want those defaults to be in rust whenever possible
(though we still need to mention the default in documentation).
* I changed the python `AsyncConnection/AsyncTable` classes from
abstract classes with a single implementation to just classes because we
no longer have the remote implementation in python.
Note: this does NOT add the `add` function to the remote table. This PR
was already large enough, and the remote implementation is unique
enough, that I am going to do all the remote stuff at a later date (we
should have the structure in place and correct so there shouldn't be any
refactor concerns)
---------
Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
@@ -11,6 +11,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import re
|
||||
from datetime import timedelta
|
||||
|
||||
import lancedb
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@@ -250,6 +253,28 @@ def test_create_exist_ok(tmp_path):
|
||||
db.create_table("test", schema=bad_schema, exist_ok=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
assert str(db) == f"NativeDatabase(uri={tmp_path}, read_consistency_interval=None)"
|
||||
|
||||
db = await lancedb.connect_async(
|
||||
tmp_path, read_consistency_interval=timedelta(seconds=5)
|
||||
)
|
||||
assert str(db) == f"NativeDatabase(uri={tmp_path}, read_consistency_interval=5s)"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
assert db.is_open()
|
||||
db.close()
|
||||
assert not db.is_open()
|
||||
|
||||
with pytest.raises(RuntimeError, match="is closed"):
|
||||
await db.table_names()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_mode_async(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
@@ -322,6 +347,39 @@ async def test_create_exist_ok_async(tmp_path):
|
||||
# await db.create_table("test", schema=bad_schema, exist_ok=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_table(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
"item": ["foo", "bar"],
|
||||
"price": [10.0, 20.0],
|
||||
}
|
||||
)
|
||||
await db.create_table("test", data=data)
|
||||
|
||||
tbl = await db.open_table("test")
|
||||
assert tbl.name == "test"
|
||||
assert (
|
||||
re.search(
|
||||
r"NativeTable\(test, uri=.*test\.lance, read_consistency_interval=None\)",
|
||||
str(tbl),
|
||||
)
|
||||
is not None
|
||||
)
|
||||
assert await tbl.schema() == pa.schema(
|
||||
{
|
||||
"vector": pa.list_(pa.float32(), list_size=2),
|
||||
"item": pa.utf8(),
|
||||
"price": pa.float64(),
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="was not found"):
|
||||
await db.open_table("does_not_exist")
|
||||
|
||||
|
||||
def test_delete_table(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
data = pd.DataFrame(
|
||||
|
||||
@@ -26,8 +26,9 @@ import pandas as pd
|
||||
import polars as pl
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from lancedb.conftest import MockTextEmbeddingFunction
|
||||
from lancedb.db import LanceDBConnection
|
||||
from lancedb.db import AsyncConnection, LanceDBConnection
|
||||
from lancedb.embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.table import LanceTable
|
||||
@@ -49,6 +50,13 @@ def db(tmp_path) -> MockDB:
|
||||
return MockDB(tmp_path)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def db_async(tmp_path) -> AsyncConnection:
|
||||
return await lancedb.connect_async(
|
||||
tmp_path, read_consistency_interval=timedelta(seconds=0)
|
||||
)
|
||||
|
||||
|
||||
def test_basic(db):
|
||||
ds = LanceTable.create(
|
||||
db,
|
||||
@@ -65,6 +73,18 @@ def test_basic(db):
|
||||
assert table.to_lance().to_table() == ds.to_table()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close(db_async: AsyncConnection):
|
||||
table = await db_async.create_table("some_table", data=[{"id": 0}])
|
||||
assert table.is_open()
|
||||
table.close()
|
||||
assert not table.is_open()
|
||||
|
||||
with pytest.raises(Exception, match="Table some_table is closed"):
|
||||
await table.count_rows()
|
||||
assert str(table) == "ClosedTable(some_table)"
|
||||
|
||||
|
||||
def test_create_table(db):
|
||||
schema = pa.schema(
|
||||
[
|
||||
@@ -186,6 +206,25 @@ def test_add_pydantic_model(db):
|
||||
assert len(really_flattened.columns) == 7
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_async(db_async: AsyncConnection):
|
||||
table = await db_async.create_table(
|
||||
"test",
|
||||
data=[
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||
],
|
||||
)
|
||||
assert await table.count_rows() == 2
|
||||
await table.add(
|
||||
data=[
|
||||
{"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
|
||||
],
|
||||
)
|
||||
table = await db_async.open_table("test")
|
||||
assert await table.count_rows() == 3
|
||||
|
||||
|
||||
def test_polars(db):
|
||||
data = {
|
||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||
|
||||
Reference in New Issue
Block a user