From f4afe456e885659f74b64ca66db8707cf4e929b2 Mon Sep 17 00:00:00 2001 From: Bert Date: Wed, 8 Jan 2025 19:13:58 -0500 Subject: [PATCH] feat!: change default from postfiltering to prefiltering for sync python (#2000) BREAKING CHANGE: prefiltering is now the default in the synchronous python SDK resolves: #1872 --- python/python/lancedb/query.py | 13 +++++++------ python/python/tests/test_query.py | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 2d337f05..68d7016c 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -254,7 +254,7 @@ class LanceQueryBuilder(ABC): self._offset = 0 self._columns = None self._where = None - self._prefilter = False + self._prefilter = True self._with_row_id = False self._vector = None self._text = None @@ -425,7 +425,7 @@ class LanceQueryBuilder(ABC): raise ValueError("columns must be a list or a dictionary") return self - def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder: + def where(self, where: str, prefilter: bool = True) -> LanceQueryBuilder: """Set the where clause. Parameters @@ -434,7 +434,7 @@ class LanceQueryBuilder(ABC): The where clause which is a valid SQL where clause. See `Lance filter pushdown `_ for valid SQL expressions. - prefilter: bool, default False + prefilter: bool, default True If True, apply the filter before vector search, otherwise the filter is applied on the result of vector search. This feature is **EXPERIMENTAL** and may be removed and modified @@ -575,7 +575,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): ... .limit(2) ... .to_pandas()) b vector _distance - 0 6 [0.4, 0.4] 0.0 + 0 6 [0.4, 0.4] 0.000000 + 1 2 [1.1, 1.2] 0.000944 """ def __init__( @@ -762,7 +763,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): return result_set - def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder: + def where(self, where: str, prefilter: bool = True) -> LanceVectorQueryBuilder: """Set the where clause. Parameters @@ -771,7 +772,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): The where clause which is a valid SQL where clause. See `Lance filter pushdown `_ for valid SQL expressions. - prefilter: bool, default False + prefilter: bool, default True If True, apply the filter before vector search, otherwise the filter is applied on the result of vector search. This feature is **EXPERIMENTAL** and may be removed and modified diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index 3f651fad..1d9db4f0 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -228,15 +228,25 @@ def test_query_builder_with_filter(table): def test_query_builder_with_prefilter(table): df = ( LanceVectorQueryBuilder(table, [0, 0], "vector") - .where("id = 2") + .where("id = 2", prefilter=True) + .limit(1) + .to_pandas() + ) + assert df["id"].values[0] == 2 + assert all(df["vector"].values[0] == [3, 4]) + + df = ( + LanceVectorQueryBuilder(table, [0, 0], "vector") + .where("id = 2", prefilter=False) .limit(1) .to_pandas() ) assert len(df) == 0 + # ensure the default prefilter = True df = ( LanceVectorQueryBuilder(table, [0, 0], "vector") - .where("id = 2", prefilter=True) + .where("id = 2") .limit(1) .to_pandas() ) @@ -286,6 +296,7 @@ def test_query_builder_with_different_vector_column(): Query( vector=query, filter="b < 10", + prefilter=True, k=2, metric="cosine", columns=["b"],