mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-03 11:00:40 +00:00
fix!: combine repeated where filters with AND instead of replacing (#3585)
BREAKING CHANGE: When passing multiple where clauses to a query, they now stack instead of replacing the previous filter. Previously, calling `where`/`only_if` more than once on a query silently replaced the previous filter, so only the last filter was applied. This was surprising and could return rows that an earlier filter should have excluded. This implements the alternative suggested in https://github.com/lancedb/lancedb/pull/3514#issuecomment-4664901580: instead of rejecting a second filter, repeated filters are combined with a logical AND (`(previous) AND (new)`). The combination happens in the Rust core (`QueryBase::only_if` and `only_if_expr`), so it applies to all SDKs at once (Rust, Python async, and TypeScript). The Python sync query builder keeps its own filter state, so it combines filters in the binding layer as well. SQL string and expression filters are combined within their own representation. When the two representations are mixed, the expression is lowered to SQL (via `expr_to_sql_string`) and the filters are combined as SQL strings, so chaining `where` works regardless of which form each filter takes. Fixes #2649 ## Tests - Rust: `cargo test --features remote -p lancedb --lib query` - Python: `uv run --extra tests pytest python/tests/test_query.py` - TypeScript: `pnpm test __test__/query.test.ts` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -119,6 +119,27 @@ def _filter_to_sql(filter: Optional[Union[str, Expr]]) -> Optional[str]:
|
||||
return filter
|
||||
|
||||
|
||||
def _combine_where(
|
||||
existing: Optional[Union[str, Expr]], new: Union[str, Expr]
|
||||
) -> Union[str, Expr]:
|
||||
"""Combine a new filter with an existing one using a logical AND.
|
||||
|
||||
Calling ``where`` more than once composes the filters with AND instead of
|
||||
replacing the previous filter. Two :class:`~lancedb.expr.Expr` filters are
|
||||
combined as an expression; otherwise both filters are lowered to SQL strings
|
||||
and combined as SQL.
|
||||
"""
|
||||
if existing is None:
|
||||
return new
|
||||
existing_is_expr = isinstance(existing, Expr)
|
||||
new_is_expr = isinstance(new, Expr)
|
||||
if existing_is_expr and new_is_expr:
|
||||
return existing & new
|
||||
existing_sql = existing.to_sql() if existing_is_expr else existing
|
||||
new_sql = new.to_sql() if new_is_expr else new
|
||||
return f"({existing_sql}) AND ({new_sql})"
|
||||
|
||||
|
||||
def _projection_to_scanner_kwargs(
|
||||
columns: Optional[
|
||||
Union[
|
||||
@@ -1148,8 +1169,13 @@ class LanceQueryBuilder(ABC):
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Calling this multiple times combines the filters with a logical AND
|
||||
rather than replacing the previous filter.
|
||||
"""
|
||||
self._where = where
|
||||
self._where = _combine_where(self._where, where)
|
||||
self._postfilter = not prefilter
|
||||
return self
|
||||
|
||||
@@ -1693,8 +1719,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Calling this multiple times combines the filters with a logical AND
|
||||
rather than replacing the previous filter.
|
||||
"""
|
||||
self._where = where
|
||||
self._where = _combine_where(self._where, where)
|
||||
if prefilter is not None:
|
||||
self._postfilter = not prefilter
|
||||
return self
|
||||
@@ -2894,6 +2925,9 @@ class AsyncStandardQuery(AsyncQueryBase):
|
||||
|
||||
Filtering performance can often be improved by creating a scalar index
|
||||
on the filter column(s).
|
||||
|
||||
Calling this multiple times combines the filters with a logical AND
|
||||
rather than replacing the previous filter.
|
||||
"""
|
||||
if isinstance(predicate, Expr):
|
||||
self._inner.where_expr(predicate._inner)
|
||||
|
||||
@@ -502,6 +502,61 @@ def test_with_row_id(table: lancedb.table.Table):
|
||||
assert rs["_rowid"].to_pylist() == [0, 1]
|
||||
|
||||
|
||||
def test_where_repeated_combines_with_and(table: lancedb.table.Table):
|
||||
# Calling where() more than once should AND the filters together instead of
|
||||
# silently replacing the previous one (regression test for #2649).
|
||||
builder = table.search().where("id >= 1").where("id < 2")
|
||||
assert builder._where == "(id >= 1) AND (id < 2)"
|
||||
|
||||
ids = [row["id"] for row in builder.limit(10).to_list()]
|
||||
assert ids == [1]
|
||||
|
||||
|
||||
def test_where_repeated_combines_expr(table: lancedb.table.Table):
|
||||
from lancedb.expr import col, lit
|
||||
|
||||
builder = table.search().where(col("id") >= lit(1)).where(col("id") < lit(2))
|
||||
ids = [row["id"] for row in builder.limit(10).to_list()]
|
||||
assert ids == [1]
|
||||
|
||||
|
||||
def test_where_mixed_filter_kinds_combines(table: lancedb.table.Table):
|
||||
# Mixing a SQL string filter with an expression filter lowers the
|
||||
# expression to SQL and combines them as SQL strings.
|
||||
from lancedb.expr import col, lit
|
||||
|
||||
builder = table.search().where("id >= 1").where(col("id") < lit(2))
|
||||
ids = [row["id"] for row in builder.limit(10).to_list()]
|
||||
assert ids == [1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_where_repeated_combines_with_and_async(table_async: AsyncTable):
|
||||
ids = [
|
||||
row["id"]
|
||||
for row in (
|
||||
await table_async.query().where("id >= 1").where("id < 2").to_list()
|
||||
)
|
||||
]
|
||||
assert ids == [1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_where_mixed_filter_kinds_combines_async(table_async: AsyncTable):
|
||||
from lancedb.expr import col, lit
|
||||
|
||||
ids = [
|
||||
row["id"]
|
||||
for row in (
|
||||
await table_async.query()
|
||||
.where("id >= 1")
|
||||
.where(col("id") < lit(2))
|
||||
.to_list()
|
||||
)
|
||||
]
|
||||
assert ids == [1]
|
||||
|
||||
|
||||
def test_distance_range(table: lancedb.table.Table):
|
||||
q = [0, 0]
|
||||
rs = table.search(q).to_arrow()
|
||||
|
||||
Reference in New Issue
Block a user