mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 23:12:58 +00:00
feat!: change default from postfiltering to prefiltering for sync python (#2000)
BREAKING CHANGE: prefiltering is now the default in the synchronous python SDK resolves: #1872
This commit is contained in:
@@ -254,7 +254,7 @@ class LanceQueryBuilder(ABC):
|
||||
self._offset = 0
|
||||
self._columns = None
|
||||
self._where = None
|
||||
self._prefilter = False
|
||||
self._prefilter = True
|
||||
self._with_row_id = False
|
||||
self._vector = None
|
||||
self._text = None
|
||||
@@ -425,7 +425,7 @@ class LanceQueryBuilder(ABC):
|
||||
raise ValueError("columns must be a list or a dictionary")
|
||||
return self
|
||||
|
||||
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
|
||||
def where(self, where: str, prefilter: bool = True) -> LanceQueryBuilder:
|
||||
"""Set the where clause.
|
||||
|
||||
Parameters
|
||||
@@ -434,7 +434,7 @@ class LanceQueryBuilder(ABC):
|
||||
The where clause which is a valid SQL where clause. See
|
||||
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
|
||||
for valid SQL expressions.
|
||||
prefilter: bool, default False
|
||||
prefilter: bool, default True
|
||||
If True, apply the filter before vector search, otherwise the
|
||||
filter is applied on the result of vector search.
|
||||
This feature is **EXPERIMENTAL** and may be removed and modified
|
||||
@@ -575,7 +575,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
... .limit(2)
|
||||
... .to_pandas())
|
||||
b vector _distance
|
||||
0 6 [0.4, 0.4] 0.0
|
||||
0 6 [0.4, 0.4] 0.000000
|
||||
1 2 [1.1, 1.2] 0.000944
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -762,7 +763,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
return result_set
|
||||
|
||||
def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder:
|
||||
def where(self, where: str, prefilter: bool = True) -> LanceVectorQueryBuilder:
|
||||
"""Set the where clause.
|
||||
|
||||
Parameters
|
||||
@@ -771,7 +772,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
The where clause which is a valid SQL where clause. See
|
||||
`Lance filter pushdown <https://lancedb.github.io/lance/read_and_write.html#filter-push-down>`_
|
||||
for valid SQL expressions.
|
||||
prefilter: bool, default False
|
||||
prefilter: bool, default True
|
||||
If True, apply the filter before vector search, otherwise the
|
||||
filter is applied on the result of vector search.
|
||||
This feature is **EXPERIMENTAL** and may be removed and modified
|
||||
|
||||
@@ -228,15 +228,25 @@ def test_query_builder_with_filter(table):
|
||||
def test_query_builder_with_prefilter(table):
|
||||
df = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
.where("id = 2")
|
||||
.where("id = 2", prefilter=True)
|
||||
.limit(1)
|
||||
.to_pandas()
|
||||
)
|
||||
assert df["id"].values[0] == 2
|
||||
assert all(df["vector"].values[0] == [3, 4])
|
||||
|
||||
df = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
.where("id = 2", prefilter=False)
|
||||
.limit(1)
|
||||
.to_pandas()
|
||||
)
|
||||
assert len(df) == 0
|
||||
|
||||
# ensure the default prefilter = True
|
||||
df = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
.where("id = 2", prefilter=True)
|
||||
.where("id = 2")
|
||||
.limit(1)
|
||||
.to_pandas()
|
||||
)
|
||||
@@ -286,6 +296,7 @@ def test_query_builder_with_different_vector_column():
|
||||
Query(
|
||||
vector=query,
|
||||
filter="b < 10",
|
||||
prefilter=True,
|
||||
k=2,
|
||||
metric="cosine",
|
||||
columns=["b"],
|
||||
|
||||
Reference in New Issue
Block a user