feat!: change default from postfiltering to prefiltering for sync python (#2000)

BREAKING CHANGE: prefiltering is now the default in the synchronous
python SDK

resolves: #1872
This commit is contained in:
Bert
2025-01-08 19:13:58 -05:00
committed by GitHub
parent ea5c2266b8
commit f4afe456e8
2 changed files with 20 additions and 8 deletions

View File

@@ -254,7 +254,7 @@ class LanceQueryBuilder(ABC):
self._offset = 0
self._columns = None
self._where = None
self._prefilter = False
self._prefilter = True
self._with_row_id = False
self._vector = None
self._text = None
@@ -425,7 +425,7 @@ class LanceQueryBuilder(ABC):
raise ValueError("columns must be a list or a dictionary")
return self
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
def where(self, where: str, prefilter: bool = True) -> LanceQueryBuilder:
"""Set the where clause.
Parameters
@@ -434,7 +434,7 @@ class LanceQueryBuilder(ABC):
The where clause which is a valid SQL where clause. See
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
for valid SQL expressions.
prefilter: bool, default False
prefilter: bool, default True
If True, apply the filter before vector search, otherwise the
filter is applied on the result of vector search.
This feature is **EXPERIMENTAL** and may be removed and modified
@@ -575,7 +575,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
... .limit(2)
... .to_pandas())
b vector _distance
0 6 [0.4, 0.4] 0.0
0 6 [0.4, 0.4] 0.000000
1 2 [1.1, 1.2] 0.000944
"""
def __init__(
@@ -762,7 +763,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
return result_set
def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder:
def where(self, where: str, prefilter: bool = True) -> LanceVectorQueryBuilder:
"""Set the where clause.
Parameters
@@ -771,7 +772,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
The where clause which is a valid SQL where clause. See
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
for valid SQL expressions.
prefilter: bool, default False
prefilter: bool, default True
If True, apply the filter before vector search, otherwise the
filter is applied on the result of vector search.
This feature is **EXPERIMENTAL** and may be removed and modified

View File

@@ -228,15 +228,25 @@ def test_query_builder_with_filter(table):
def test_query_builder_with_prefilter(table):
df = (
LanceVectorQueryBuilder(table, [0, 0], "vector")
.where("id = 2")
.where("id = 2", prefilter=True)
.limit(1)
.to_pandas()
)
assert df["id"].values[0] == 2
assert all(df["vector"].values[0] == [3, 4])
df = (
LanceVectorQueryBuilder(table, [0, 0], "vector")
.where("id = 2", prefilter=False)
.limit(1)
.to_pandas()
)
assert len(df) == 0
# ensure the default prefilter = True
df = (
LanceVectorQueryBuilder(table, [0, 0], "vector")
.where("id = 2", prefilter=True)
.where("id = 2")
.limit(1)
.to_pandas()
)
@@ -286,6 +296,7 @@ def test_query_builder_with_different_vector_column():
Query(
vector=query,
filter="b < 10",
prefilter=True,
k=2,
metric="cosine",
columns=["b"],