mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 15:12:53 +00:00
fix(python): Few fts patches (#1039)
1. filtering with fts mutated the schema, which caused schema mistmatch problems with hybrid search as it combines fts and vector search tables. 2. fts with filter failed with `with_row_id`. This was because row_id was calculated before filtering which caused size mismatch on attaching it after. 3. The fix for 1 meant that now row_id is attached before filtering but passing a filter to `to_lance` on a dataset that already contains `_rowid` raises a panic from lance. So temporarily, in case where fts is used with a filter AND `with_row_id`, we just force user to using the duckdb pathway. --------- Co-authored-by: Chang She <759245+changhiskhan@users.noreply.github.com>
This commit is contained in:
committed by
Weston Pace
parent
c60a193767
commit
b5326d31e9
@@ -137,7 +137,11 @@ def test_search_index_with_filter(table):
|
||||
|
||||
# no duckdb
|
||||
with mock.patch("builtins.__import__", side_effect=import_mock):
|
||||
rs = table.search("puppy").where("id=1").limit(10).to_list()
|
||||
rs = table.search("puppy").where("id=1").limit(10)
|
||||
# test schema
|
||||
assert rs.to_arrow().drop("score").schema.equals(table.schema)
|
||||
|
||||
rs = rs.to_list()
|
||||
for r in rs:
|
||||
assert r["id"] == 1
|
||||
|
||||
@@ -147,6 +151,10 @@ def test_search_index_with_filter(table):
|
||||
assert r["id"] == 1
|
||||
|
||||
assert rs == rs2
|
||||
rs = table.search("puppy").where("id=1").with_row_id(True).limit(10).to_list()
|
||||
for r in rs:
|
||||
assert r["id"] == 1
|
||||
assert r["_rowid"] is not None
|
||||
|
||||
|
||||
def test_null_input(table):
|
||||
|
||||
@@ -893,8 +893,17 @@ def test_hybrid_search(db, tmp_path):
|
||||
result3 = table.search(
|
||||
"Our father who art in heaven", query_type="hybrid"
|
||||
).to_pydantic(MyTable)
|
||||
|
||||
assert result1 == result3
|
||||
|
||||
# with post filters
|
||||
result = (
|
||||
table.search("Arrrrggghhhhhhh", query_type="hybrid")
|
||||
.where("text='Arrrrggghhhhhhh'")
|
||||
.to_list()
|
||||
)
|
||||
len(result) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"consistency_interval", [None, timedelta(seconds=0), timedelta(seconds=0.1)]
|
||||
|
||||
Reference in New Issue
Block a user