feat: add the explain_plan function (#1328)

It's useful to see the underlying query plan for debugging purposes.
This exposes LanceScanner's `explain_plan` function. Addresses #1288

---------

Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
Nuvic
2024-07-02 11:10:01 -07:00
committed by GitHub
parent 12b3c87964
commit 46c6ff889d
9 changed files with 226 additions and 15 deletions

View File

@@ -417,6 +417,38 @@ class LanceQueryBuilder(ABC):
self._with_row_id = with_row_id
return self
def explain_plan(self, verbose: Optional[bool] = False) -> str:
"""Return the execution plan for this query.
Examples
--------
>>> import lancedb
>>> db = lancedb.connect("./.lancedb")
>>> table = db.create_table("my_table", [{"vector": [99, 99]}])
>>> query = [100, 100]
>>> plan = table.search(query).explain_plan(True)
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Projection: fields=[vector, _distance]
KNNFlat: k=10 metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
verbose : bool, default False
Use a verbose output format.
Returns
-------
plan : str
""" # noqa: E501
ds = self._table.to_lance()
return ds.scanner(
nearest={
"column": self._vector_column,
"q": self._query,
},
).explain_plan(verbose)
class LanceVectorQueryBuilder(LanceQueryBuilder):
"""
@@ -1166,6 +1198,35 @@ class AsyncQueryBase(object):
"""
return (await self.to_arrow()).to_pandas()
async def explain_plan(self, verbose: Optional[bool] = False):
"""Return the execution plan for this query.
Examples
--------
>>> import asyncio
>>> from lancedb import connect_async
>>> async def doctest_example():
... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
... query = [100, 100]
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Projection: fields=[vector, _distance]
KNNFlat: k=10 metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
verbose : bool, default False
Use a verbose output format.
Returns
-------
plan : str
""" # noqa: E501
return await self._inner.explain_plan(verbose)
class AsyncQuery(AsyncQueryBase):
def __init__(self, inner: LanceQuery):

View File

@@ -333,3 +333,15 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
df = await table_async.query().where("id < 0").to_pandas()
assert df.shape == (0, 4)
def test_explain_plan(table):
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
plan = q.explain_plan(verbose=True)
assert "KNN" in plan
@pytest.mark.asyncio
async def test_explain_plan_async(table_async: AsyncTable):
plan = await table_async.query().nearest_to(pa.array([1, 2])).explain_plan(True)
assert "KNN" in plan