From 4beb2d2877265d74f31d049fc2e0ec0fb0bdaa8e Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 26 Jun 2025 23:35:14 -0700 Subject: [PATCH] fix(python): make sure `explain_plan` works with FTS queries (#2466) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes issue #2465 where FTS explain plans only showed basic `LanceScan` instead of detailed execution plans with FTS query details, limits, and offsets. ## Root Cause The `FTSQuery::explain_plan()` and `analyze_plan()` methods were missing the `.full_text_search()` call before calling explain/analyze plan, causing them to operate on the base query without FTS context. ## Changes - **Fixed** `explain_plan()` and `analyze_plan()` in `src/query.rs` to call `.full_text_search()` - **Added comprehensive test coverage** for FTS explain plans with limits, offsets, and filters - **Updated existing tests** to expect correct behavior instead of buggy behavior ## Before/After **Before (broken):** ``` LanceScan: uri=..., projection=[...], row_id=false, row_addr=false, ordered=true ``` **After (fixed):** ``` ProjectionExec: expr=[id@2 as id, text@3 as text, _score@1 as _score] Take: columns="_rowid, _score, (id), (text)" CoalesceBatchesExec: target_batch_size=1024 GlobalLimitExec: skip=2, fetch=4 MatchQuery: query=test ``` ## Test Plan - [x] All new FTS explain plan tests pass - [x] Existing tests continue to pass - [x] FTS queries now show proper execution plans with MatchQuery, limits, filters Closes #2465 🤖 Generated with [Claude Code](https://claude.ai/code) ## Summary by CodeRabbit * **Tests** * Added new test cases to verify explain plan output for full-text search, vector queries with pagination, and queries with filters. * **Bug Fixes** * Improved the accuracy of explain plan and analysis output for full-text search queries, ensuring the correct query details are reflected. * **Refactor** * Enhanced the formatting and hierarchical structure of execution plans for hybrid queries, providing clearer and more detailed plan representations. --------- Co-authored-by: Claude --- python/python/lancedb/query.py | 22 +++++---- python/python/tests/test_query.py | 76 +++++++++++++++++++++++++++++++ python/src/query.rs | 10 +++- 3 files changed, 98 insertions(+), 10 deletions(-) diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 20a6bdc0..23ab8c18 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -3042,15 +3042,21 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase): >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE Vector Search Plan: ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance] - Take: columns="vector, _rowid, _distance, (text)" - CoalesceBatchesExec: target_batch_size=1024 - GlobalLimitExec: skip=0, fetch=10 - FilterExec: _distance@2 IS NOT NULL - SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] - KNNVectorDistance: metric=l2 - LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false + Take: columns="vector, _rowid, _distance, (text)" + CoalesceBatchesExec: target_batch_size=1024 + GlobalLimitExec: skip=0, fetch=10 + FilterExec: _distance@2 IS NOT NULL + SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] + KNNVectorDistance: metric=l2 + LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false + FTS Search Plan: - LanceScan: uri=..., projection=[vector, text], row_id=false, row_addr=false, ordered=true + ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score] + Take: columns="_rowid, _score, (vector), (text)" + CoalesceBatchesExec: target_batch_size=1024 + GlobalLimitExec: skip=0, fetch=10 + MatchQuery: query=hello + Parameters ---------- diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index 3abdadc6..10ee5d6e 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -775,6 +775,82 @@ async def test_explain_plan_async(table_async: AsyncTable): assert "KNN" in plan +@pytest.mark.asyncio +async def test_explain_plan_fts(table_async: AsyncTable): + """Test explain plan for FTS queries""" + # Create FTS index + from lancedb.index import FTS + + await table_async.create_index("text", config=FTS()) + + # Test pure FTS query + query = await table_async.search("dog", query_type="fts", fts_columns="text") + plan = await query.explain_plan() + # Should show FTS details (issue #2465 is now fixed) + assert "MatchQuery: query=dog" in plan + assert "GlobalLimitExec" in plan # Default limit + + # Test FTS query with limit + query_with_limit = await table_async.search( + "dog", query_type="fts", fts_columns="text" + ) + plan_with_limit = await query_with_limit.limit(1).explain_plan() + assert "MatchQuery: query=dog" in plan_with_limit + assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit + + # Test FTS query with offset and limit + query_with_offset = await table_async.search( + "dog", query_type="fts", fts_columns="text" + ) + plan_with_offset = await query_with_offset.offset(1).limit(1).explain_plan() + assert "MatchQuery: query=dog" in plan_with_offset + assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset + + +@pytest.mark.asyncio +async def test_explain_plan_vector_with_limit_offset(table_async: AsyncTable): + """Test explain plan for vector queries with limit and offset""" + # Test vector query with limit + plan_with_limit = await ( + table_async.query().nearest_to(pa.array([1, 2])).limit(1).explain_plan() + ) + assert "KNN" in plan_with_limit + assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit + + # Test vector query with offset and limit + plan_with_offset = await ( + table_async.query() + .nearest_to(pa.array([1, 2])) + .offset(1) + .limit(1) + .explain_plan() + ) + assert "KNN" in plan_with_offset + assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset + + +@pytest.mark.asyncio +async def test_explain_plan_with_filters(table_async: AsyncTable): + """Test explain plan for queries with filters""" + # Test vector query with filter + plan_with_filter = await ( + table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan() + ) + assert "KNN" in plan_with_filter + assert "FilterExec" in plan_with_filter + + # Test FTS query with filter + from lancedb.index import FTS + + await table_async.create_index("text", config=FTS()) + query_fts_filter = await table_async.search( + "dog", query_type="fts", fts_columns="text" + ) + plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan() + assert "MatchQuery: query=dog" in plan_fts_filter + assert "FilterExec: id@" in plan_fts_filter # Should show filter details + + @pytest.mark.asyncio async def test_query_camelcase_async(tmp_path): db = await lancedb.connect_async(tmp_path) diff --git a/python/src/query.rs b/python/src/query.rs index ad0309ca..a7e92930 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -563,7 +563,10 @@ impl FTSQuery { } pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult> { - let inner = self_.inner.clone(); + let inner = self_ + .inner + .clone() + .full_text_search(self_.fts_query.clone()); future_into_py(self_.py(), async move { inner .explain_plan(verbose) @@ -573,7 +576,10 @@ impl FTSQuery { } pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult> { - let inner = self_.inner.clone(); + let inner = self_ + .inner + .clone() + .full_text_search(self_.fts_query.clone()); future_into_py(self_.py(), async move { inner .analyze_plan()