mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-04 02:42:57 +00:00
Better handle empty results from tantivy (#155)
Closes #154 --------- Co-authored-by: Chang She <chang@lancedb.com>
This commit is contained in:
@@ -118,6 +118,8 @@ def search_index(
|
||||
query = index.parse_query(query)
|
||||
# get top results
|
||||
results = searcher.search(query, limit)
|
||||
if results.count == 0:
|
||||
return tuple(), tuple()
|
||||
return tuple(
|
||||
zip(
|
||||
*[
|
||||
|
||||
@@ -164,6 +164,8 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
index = tantivy.Index.open(index_path)
|
||||
# get the scores and doc ids
|
||||
row_ids, scores = search_index(index, self._query, self._limit)
|
||||
if len(row_ids) == 0:
|
||||
return pd.DataFrame()
|
||||
scores = pa.array(scores)
|
||||
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
|
||||
output_tbl = output_tbl.append_column("score", scores)
|
||||
|
||||
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tests = [
|
||||
"pytest"
|
||||
"pytest", "pytest-mock"
|
||||
]
|
||||
dev = [
|
||||
"ruff", "pre-commit", "black"
|
||||
|
||||
@@ -82,3 +82,10 @@ def test_create_index_multiple_columns(tmp_path, table):
|
||||
assert len(df) == 10
|
||||
assert "text" in df.columns
|
||||
assert "text2" in df.columns
|
||||
|
||||
|
||||
def test_empty_rs(tmp_path, table, mocker):
|
||||
table.create_fts_index(["text", "text2"])
|
||||
mocker.patch("lancedb.fts.search_index", return_value=([], []))
|
||||
df = table.search("puppy").limit(10).to_df()
|
||||
assert len(df) == 0
|
||||
|
||||
Reference in New Issue
Block a user