mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-12 23:02:59 +00:00
Order by field support FTS (#1132)
This PR adds support for passing through a set of ordering fields at index time (unsigned ints that tantivity can use as fast_fields) that at query time you can sort your results on. This is useful for cases where you want to get related hits, i.e by keyword, but order those hits by some other score, such as popularity. I.e search for songs descriptions that match on "sad AND jazz AND 1920" and then order those by number of times played. Example usage can be seen in the fts tests. --------- Co-authored-by: Nat Roth <natroth@Nats-MacBook-Pro.local> Co-authored-by: Chang She <759245+changhiskhan@users.noreply.github.com>
This commit is contained in:
committed by
Weston Pace
parent
4466cfa958
commit
f6e9f8e3f4
@@ -43,6 +43,7 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
||||
)
|
||||
for _ in range(100)
|
||||
]
|
||||
count = [random.randint(1, 10000) for _ in range(100)]
|
||||
table = db.create_table(
|
||||
"test",
|
||||
data=pd.DataFrame(
|
||||
@@ -52,6 +53,7 @@ def table(tmp_path) -> ldb.table.LanceTable:
|
||||
"text": text,
|
||||
"text2": text,
|
||||
"nested": [{"text": t} for t in text],
|
||||
"count": count,
|
||||
}
|
||||
),
|
||||
)
|
||||
@@ -79,6 +81,39 @@ def test_search_index(tmp_path, table):
|
||||
assert len(results[1]) == 10 # _distance
|
||||
|
||||
|
||||
def test_search_ordering_field_index_table(tmp_path, table):
|
||||
table.create_fts_index("text", ordering_field_names=["count"])
|
||||
rows = (
|
||||
table.search("puppy", ordering_field_name="count")
|
||||
.limit(20)
|
||||
.select(["text", "count"])
|
||||
.to_list()
|
||||
)
|
||||
for r in rows:
|
||||
assert "puppy" in r["text"]
|
||||
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
|
||||
|
||||
|
||||
def test_search_ordering_field_index(tmp_path, table):
|
||||
index = ldb.fts.create_index(
|
||||
str(tmp_path / "index"), ["text"], ordering_fields=["count"]
|
||||
)
|
||||
|
||||
ldb.fts.populate_index(index, table, ["text"], ordering_fields=["count"])
|
||||
index.reload()
|
||||
results = ldb.fts.search_index(
|
||||
index, query="puppy", limit=10, ordering_field="count"
|
||||
)
|
||||
assert len(results) == 2
|
||||
assert len(results[0]) == 10 # row_ids
|
||||
assert len(results[1]) == 10 # _distance
|
||||
rows = table.to_lance().take(results[0]).to_pylist()
|
||||
|
||||
for r in rows:
|
||||
assert "puppy" in r["text"]
|
||||
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
|
||||
|
||||
|
||||
def test_create_index_from_table(tmp_path, table):
|
||||
table.create_fts_index("text")
|
||||
df = table.search("puppy").limit(10).select(["text"]).to_pandas()
|
||||
@@ -94,6 +129,7 @@ def test_create_index_from_table(tmp_path, table):
|
||||
"text": "gorilla",
|
||||
"text2": "gorilla",
|
||||
"nested": {"text": "gorilla"},
|
||||
"count": 10,
|
||||
}
|
||||
]
|
||||
)
|
||||
@@ -166,6 +202,7 @@ def test_null_input(table):
|
||||
"text": None,
|
||||
"text2": None,
|
||||
"nested": {"text": None},
|
||||
"count": 7,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user