feat(python): add to_polars to AsyncQueryBase (#1986)

Fixes https://github.com/lancedb/lancedb/issues/1952

Added `to_polars` method to `AsyncQueryBase`.
This commit is contained in:
Takahiro Ebato
2025-01-07 02:35:28 +09:00
committed by GitHub
parent 8b31540b21
commit 2c05ffed52
2 changed files with 35 additions and 0 deletions

View File

@@ -1618,6 +1618,32 @@ class AsyncQueryBase(object):
"""
return (await self.to_arrow()).to_pandas()
async def to_polars(self) -> "pl.DataFrame":
"""
Execute the query and collect the results into a Polars DataFrame.
This method will collect all results into memory before returning. If you
expect a large number of results, you may want to use
[to_batches][lancedb.query.AsyncQueryBase.to_batches] and convert each batch to
polars separately.
Examples
--------
>>> import asyncio
>>> import polars as pl
>>> from lancedb import connect_async
>>> async def doctest_example():
... conn = await connect_async("./.lancedb")
... table = await conn.create_table("my_table", data=[{"a": 1, "b": 2}])
... async for batch in await table.query().to_batches():
... batch_df = pl.from_arrow(batch)
>>> asyncio.run(doctest_example())
"""
import polars as pl
return pl.from_arrow(await self.to_arrow())
async def explain_plan(self, verbose: Optional[bool] = False):
"""Return the execution plan for this query.

View File

@@ -400,6 +400,15 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
assert df.shape == (0, 4)
@pytest.mark.asyncio
async def test_query_to_polars_async(table_async: AsyncTable):
df = await table_async.query().to_polars()
assert df.shape == (2, 4)
df = await table_async.query().where("id < 0").to_polars()
assert df.shape == (0, 4)
@pytest.mark.asyncio
async def test_none_query(table_async: AsyncTable):
with pytest.raises(ValueError):