feat(python): accept Expr in Table.delete and merge when_not_matched_by_source_delete (#3524)

Another little pain point as I was working to integrate with
paperless-ngx. The read path of table.search() or table.query() already
accepted an Expr, but write paths Table.delete and
merge_insert(...).when_not_matched_by_source_delete did not. This PR
attempts to close that gap, so writes and reads can both use Expr,
instead of one side needing to build a string.
This commit is contained in:
Trenton H
2026-06-11 07:59:49 -07:00
committed by GitHub
parent 4fb7c92e86
commit ae7f2cbfe8
5 changed files with 129 additions and 24 deletions

View File

@@ -22,6 +22,7 @@ import pytest
from lancedb.conftest import MockTextEmbeddingFunction
from lancedb.db import AsyncConnection, DBConnection
from lancedb.embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
from lancedb.expr import col, lit
from lancedb.pydantic import LanceModel, Vector
from lancedb.table import LanceTable
from pydantic import BaseModel
@@ -1966,6 +1967,38 @@ def test_delete(mem_db: DBConnection):
assert table.to_arrow()["id"].to_pylist() == [1]
def test_delete_expr(mem_db: DBConnection):
table = mem_db.create_table(
"my_table",
data=[
{"vector": [1.1, 0.9], "id": 0},
{"vector": [1.2, 1.9], "id": 1},
{"vector": [1.3, 2.9], "id": 2},
],
)
assert len(table) == 3
delete_res = table.delete(col("id") == lit(0))
assert delete_res.version == 2
assert len(table) == 2
assert sorted(table.to_arrow()["id"].to_pylist()) == [1, 2]
@pytest.mark.asyncio
async def test_delete_expr_async(mem_db_async: AsyncConnection):
table = await mem_db_async.create_table(
"my_table",
data=[
{"vector": [1.1, 0.9], "id": 0},
{"vector": [1.2, 1.9], "id": 1},
{"vector": [1.3, 2.9], "id": 2},
],
)
assert await table.count_rows() == 3
await table.delete(col("id") == lit(0))
assert await table.count_rows() == 2
assert sorted((await table.to_arrow())["id"].to_pylist()) == [1, 2]
def test_update(mem_db: DBConnection):
table = mem_db.create_table(
"my_table",
@@ -2151,6 +2184,50 @@ def test_merge_insert(mem_db: DBConnection):
)
def test_merge_insert_by_source_delete_expr(mem_db: DBConnection):
table = mem_db.create_table(
"my_table",
data=pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]}),
)
new_data = pa.table({"a": [2, 4], "b": ["x", "z"]})
# replace-range, limiting the source-absent delete with an Expr condition
merge_insert_res = (
table.merge_insert("a")
.when_matched_update_all()
.when_not_matched_insert_all()
.when_not_matched_by_source_delete(col("a") > lit(2))
.execute(new_data)
)
assert merge_insert_res.num_inserted_rows == 1
assert merge_insert_res.num_updated_rows == 1
assert merge_insert_res.num_deleted_rows == 1
expected = pa.table({"a": [1, 2, 4], "b": ["a", "x", "z"]})
assert table.to_arrow().sort_by("a") == expected
@pytest.mark.asyncio
async def test_merge_insert_by_source_delete_expr_async(
mem_db_async: AsyncConnection,
):
data = pa.table({"a": [1, 2, 3], "b": ["a", "b", "c"]})
table = await mem_db_async.create_table("some_table", data=data)
new_data = pa.table({"a": [2, 4], "b": ["x", "z"]})
# replace-range, limiting the source-absent delete with an Expr condition
await (
table.merge_insert("a")
.when_matched_update_all()
.when_not_matched_insert_all()
.when_not_matched_by_source_delete(col("a") > lit(2))
.execute(new_data)
)
expected = pa.table({"a": [1, 2, 4], "b": ["a", "x", "z"]})
assert (await table.to_arrow()).sort_by("a") == expected
# We vary the data format because there are slight differences in how
# subschemas are handled in different formats
@pytest.mark.parametrize(