fix(python): make sure explain_plan works with FTS queries (#2466)

## Summary Fixes issue #2465 where FTS explain plans only showed basic `LanceScan` instead of detailed execution plans with FTS query details, limits, and offsets. ## Root Cause The `FTSQuery::explain_plan()` and `analyze_plan()` methods were missing the `.full_text_search()` call before calling explain/analyze plan, causing them to operate on the base query without FTS context. ## Changes - **Fixed** `explain_plan()` and `analyze_plan()` in `src/query.rs` to call `.full_text_search()` - **Added comprehensive test coverage** for FTS explain plans with limits, offsets, and filters - **Updated existing tests** to expect correct behavior instead of buggy behavior ## Before/After **Before (broken):** ``` LanceScan: uri=..., projection=[...], row_id=false, row_addr=false, ordered=true ``` **After (fixed):** ``` ProjectionExec: expr=[id@2 as id, text@3 as text, _score@1 as _score] Take: columns="_rowid, _score, (id), (text)" CoalesceBatchesExec: target_batch_size=1024 GlobalLimitExec: skip=2, fetch=4 MatchQuery: query=test ``` ## Test Plan - [x] All new FTS explain plan tests pass - [x] Existing tests continue to pass - [x] FTS queries now show proper execution plans with MatchQuery, limits, filters Closes #2465 🤖 Generated with [Claude Code](https://claude.ai/code)  ## Summary by CodeRabbit * **Tests** * Added new test cases to verify explain plan output for full-text search, vector queries with pagination, and queries with filters. * **Bug Fixes** * Improved the accuracy of explain plan and analysis output for full-text search queries, ensuring the correct query details are reflected. * **Refactor** * Enhanced the formatting and hierarchical structure of execution plans for hybrid queries, providing clearer and more detailed plan representations.  --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-12-27 15:12:53 +00:00 · 2025-06-26 23:35:14 -07:00
parent a00b8595d1
commit 4beb2d2877
3 changed files with 98 additions and 10 deletions
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -3042,15 +3042,21 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
        >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Vector Search Plan:
        ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
-            Take: columns="vector, _rowid, _distance, (text)"
-                CoalesceBatchesExec: target_batch_size=1024
-                GlobalLimitExec: skip=0, fetch=10
-                    FilterExec: _distance@2 IS NOT NULL
-                    SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-                        KNNVectorDistance: metric=l2
-                        LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+          Take: columns="vector, _rowid, _distance, (text)"
+            CoalesceBatchesExec: target_batch_size=1024
+              GlobalLimitExec: skip=0, fetch=10
+                FilterExec: _distance@2 IS NOT NULL
+                  SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+                    KNNVectorDistance: metric=l2
+                      LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+        <BLANKLINE>
        FTS Search Plan:
-        LanceScan: uri=..., projection=[vector, text], row_id=false, row_addr=false, ordered=true
+        ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
+          Take: columns="_rowid, _score, (vector), (text)"
+            CoalesceBatchesExec: target_batch_size=1024
+              GlobalLimitExec: skip=0, fetch=10
+                MatchQuery: query=hello
+        <BLANKLINE>

        Parameters
        ----------
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -775,6 +775,82 @@ async def test_explain_plan_async(table_async: AsyncTable):
    assert "KNN" in plan


+@pytest.mark.asyncio
+async def test_explain_plan_fts(table_async: AsyncTable):
+    """Test explain plan for FTS queries"""
+    # Create FTS index
+    from lancedb.index import FTS
+
+    await table_async.create_index("text", config=FTS())
+
+    # Test pure FTS query
+    query = await table_async.search("dog", query_type="fts", fts_columns="text")
+    plan = await query.explain_plan()
+    # Should show FTS details (issue #2465 is now fixed)
+    assert "MatchQuery: query=dog" in plan
+    assert "GlobalLimitExec" in plan  # Default limit
+
+    # Test FTS query with limit
+    query_with_limit = await table_async.search(
+        "dog", query_type="fts", fts_columns="text"
+    )
+    plan_with_limit = await query_with_limit.limit(1).explain_plan()
+    assert "MatchQuery: query=dog" in plan_with_limit
+    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
+
+    # Test FTS query with offset and limit
+    query_with_offset = await table_async.search(
+        "dog", query_type="fts", fts_columns="text"
+    )
+    plan_with_offset = await query_with_offset.offset(1).limit(1).explain_plan()
+    assert "MatchQuery: query=dog" in plan_with_offset
+    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
+
+
+@pytest.mark.asyncio
+async def test_explain_plan_vector_with_limit_offset(table_async: AsyncTable):
+    """Test explain plan for vector queries with limit and offset"""
+    # Test vector query with limit
+    plan_with_limit = await (
+        table_async.query().nearest_to(pa.array([1, 2])).limit(1).explain_plan()
+    )
+    assert "KNN" in plan_with_limit
+    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
+
+    # Test vector query with offset and limit
+    plan_with_offset = await (
+        table_async.query()
+        .nearest_to(pa.array([1, 2]))
+        .offset(1)
+        .limit(1)
+        .explain_plan()
+    )
+    assert "KNN" in plan_with_offset
+    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
+
+
+@pytest.mark.asyncio
+async def test_explain_plan_with_filters(table_async: AsyncTable):
+    """Test explain plan for queries with filters"""
+    # Test vector query with filter
+    plan_with_filter = await (
+        table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
+    )
+    assert "KNN" in plan_with_filter
+    assert "FilterExec" in plan_with_filter
+
+    # Test FTS query with filter
+    from lancedb.index import FTS
+
+    await table_async.create_index("text", config=FTS())
+    query_fts_filter = await table_async.search(
+        "dog", query_type="fts", fts_columns="text"
+    )
+    plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
+    assert "MatchQuery: query=dog" in plan_fts_filter
+    assert "FilterExec: id@" in plan_fts_filter  # Should show filter details
+
+
@pytest.mark.asyncio
 async def test_query_camelcase_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -563,7 +563,10 @@ impl FTSQuery {
    }

    pub fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
+        let inner = self_
+            .inner
+            .clone()
+            .full_text_search(self_.fts_query.clone());
        future_into_py(self_.py(), async move {
            inner
                .explain_plan(verbose)
@@ -573,7 +576,10 @@ impl FTSQuery {
    }

    pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
+        let inner = self_
+            .inner
+            .clone()
+            .full_text_search(self_.fts_query.clone());
        future_into_py(self_.py(), async move {
            inner
                .analyze_plan()