From 50cdb16b45f4aa45c9b54d810c4565c9d55de998 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Mon, 5 Jun 2023 18:18:14 -0700 Subject: [PATCH] Better handle empty results from tantivy (#155) Closes #154 --------- Co-authored-by: Chang She --- .github/workflows/python.yml | 4 ++-- python/lancedb/fts.py | 2 ++ python/lancedb/query.py | 2 ++ python/pyproject.toml | 2 +- python/tests/test_fts.py | 7 +++++++ 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 8c91b7c2..3af5e096 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -32,7 +32,7 @@ jobs: run: | pip install -e . pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 - pip install pytest + pip install pytest pytest-mock - name: Run tests run: pytest -x -v --durations=30 tests mac: @@ -55,6 +55,6 @@ jobs: run: | pip install -e . pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 - pip install pytest + pip install pytest pytest-mock - name: Run tests run: pytest -x -v --durations=30 tests \ No newline at end of file diff --git a/python/lancedb/fts.py b/python/lancedb/fts.py index 62b607b8..259d9d80 100644 --- a/python/lancedb/fts.py +++ b/python/lancedb/fts.py @@ -118,6 +118,8 @@ def search_index( query = index.parse_query(query) # get top results results = searcher.search(query, limit) + if results.count == 0: + return tuple(), tuple() return tuple( zip( *[ diff --git a/python/lancedb/query.py b/python/lancedb/query.py index c9cc8d2d..defe744f 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -164,6 +164,8 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): index = tantivy.Index.open(index_path) # get the scores and doc ids row_ids, scores = search_index(index, self._query, self._limit) + if len(row_ids) == 0: + return pd.DataFrame() scores = pa.array(scores) output_tbl = self._table.to_lance().take(row_ids, columns=self._columns) output_tbl = output_tbl.append_column("score", scores) diff --git a/python/pyproject.toml b/python/pyproject.toml index 643fa1b5..f5ed4eb7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] tests = [ - "pytest" + "pytest", "pytest-mock" ] dev = [ "ruff", "pre-commit", "black" diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index a2d4d403..91c5e236 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -82,3 +82,10 @@ def test_create_index_multiple_columns(tmp_path, table): assert len(df) == 10 assert "text" in df.columns assert "text2" in df.columns + + +def test_empty_rs(tmp_path, table, mocker): + table.create_fts_index(["text", "text2"]) + mocker.patch("lancedb.fts.search_index", return_value=([], [])) + df = table.search("puppy").limit(10).to_df() + assert len(df) == 0