Better handle empty results from tantivy (#155)

Closes #154

---------

Co-authored-by: Chang She <chang@lancedb.com>
This commit is contained in:
Chang She
2023-06-05 18:18:14 -07:00
committed by GitHub
parent d803482588
commit 50cdb16b45
5 changed files with 14 additions and 3 deletions

View File

@@ -118,6 +118,8 @@ def search_index(
query = index.parse_query(query)
# get top results
results = searcher.search(query, limit)
if results.count == 0:
return tuple(), tuple()
return tuple(
zip(
*[

View File

@@ -164,6 +164,8 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
index = tantivy.Index.open(index_path)
# get the scores and doc ids
row_ids, scores = search_index(index, self._query, self._limit)
if len(row_ids) == 0:
return pd.DataFrame()
scores = pa.array(scores)
output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
output_tbl = output_tbl.append_column("score", scores)

View File

@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
tests = [
"pytest"
"pytest", "pytest-mock"
]
dev = [
"ruff", "pre-commit", "black"

View File

@@ -82,3 +82,10 @@ def test_create_index_multiple_columns(tmp_path, table):
assert len(df) == 10
assert "text" in df.columns
assert "text2" in df.columns
def test_empty_rs(tmp_path, table, mocker):
table.create_fts_index(["text", "text2"])
mocker.patch("lancedb.fts.search_index", return_value=([], []))
df = table.search("puppy").limit(10).to_df()
assert len(df) == 0