From 50cdb16b45f4aa45c9b54d810c4565c9d55de998 Mon Sep 17 00:00:00 2001
From: Chang She <759245+changhiskhan@users.noreply.github.com>
Date: Mon, 5 Jun 2023 18:18:14 -0700
Subject: [PATCH] Better handle empty results from tantivy (#155)

Closes #154

---------

Co-authored-by: Chang She <chang@lancedb.com>
---
 .github/workflows/python.yml | 4 ++--
 python/lancedb/fts.py        | 2 ++
 python/lancedb/query.py      | 2 ++
 python/pyproject.toml        | 2 +-
 python/tests/test_fts.py     | 7 +++++++
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 8c91b7c2..3af5e096 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -32,7 +32,7 @@ jobs:
       run: |
         pip install -e .
         pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
-        pip install pytest
+        pip install pytest pytest-mock
     - name: Run tests
       run: pytest -x -v --durations=30 tests
   mac:
@@ -55,6 +55,6 @@ jobs:
       run: |
         pip install -e .
         pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
-        pip install pytest
+        pip install pytest pytest-mock
     - name: Run tests
       run: pytest -x -v --durations=30 tests
\ No newline at end of file
diff --git a/python/lancedb/fts.py b/python/lancedb/fts.py
index 62b607b8..259d9d80 100644
--- a/python/lancedb/fts.py
+++ b/python/lancedb/fts.py
@@ -118,6 +118,8 @@ def search_index(
     query = index.parse_query(query)
     # get top results
     results = searcher.search(query, limit)
+    if results.count == 0:
+        return tuple(), tuple()
     return tuple(
         zip(
             *[
diff --git a/python/lancedb/query.py b/python/lancedb/query.py
index c9cc8d2d..defe744f 100644
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -164,6 +164,8 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
         index = tantivy.Index.open(index_path)
         # get the scores and doc ids
         row_ids, scores = search_index(index, self._query, self._limit)
+        if len(row_ids) == 0:
+            return pd.DataFrame()
         scores = pa.array(scores)
         output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
         output_tbl = output_tbl.append_column("score", scores)
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 643fa1b5..f5ed4eb7 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
 
 [project.optional-dependencies]
 tests = [
-    "pytest"
+    "pytest", "pytest-mock"
 ]
 dev = [
     "ruff", "pre-commit", "black"
diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py
index a2d4d403..91c5e236 100644
--- a/python/tests/test_fts.py
+++ b/python/tests/test_fts.py
@@ -82,3 +82,10 @@ def test_create_index_multiple_columns(tmp_path, table):
     assert len(df) == 10
     assert "text" in df.columns
     assert "text2" in df.columns
+
+
+def test_empty_rs(tmp_path, table, mocker):
+    table.create_fts_index(["text", "text2"])
+    mocker.patch("lancedb.fts.search_index", return_value=([], []))
+    df = table.search("puppy").limit(10).to_df()
+    assert len(df) == 0