feat(python): allow the entire table to be converted a polars dataframe (#814)

This commit is contained in:
Chang She
2024-01-15 15:49:16 -08:00
committed by GitHub
parent be4ab9eef3
commit af8263af94
3 changed files with 36 additions and 2 deletions

View File

@@ -73,7 +73,7 @@ def _sanitize_data(
meta = data.schema.metadata if data.schema.metadata is not None else {}
meta = {k: v for k, v in meta.items() if k != b"pandas"}
data = data.replace_schema_metadata(meta)
elif pl is not None and isinstance(data, pl.DataFrame):
elif pl is not None and isinstance(data, (pl.DataFrame, pl.LazyFrame)):
data = data.to_arrow()
if isinstance(data, pa.Table):
@@ -697,6 +697,30 @@ class LanceTable(Table):
pa.Table"""
return self._dataset.to_table()
def to_polars(self, batch_size=None) -> "pl.LazyFrame":
"""Return the table as a polars LazyFrame.
Parameters
----------
batch_size: int, optional
Passed to polars. This is the maximum row count for
scanned pyarrow record batches
Note
----
1. This requires polars to be installed separately
2. Currently we've disabled push-down of the filters from polars
because polars pushdown into pyarrow uses pyarrow compute
expressions rather than SQl strings (which LanceDB supports)
Returns
-------
pl.LazyFrame
"""
return pl.scan_pyarrow_dataset(
self.to_lance(), allow_pyarrow_filter=False, batch_size=batch_size
)
@property
def _dataset_uri(self) -> str:
return join_uri(self._conn.uri, f"{self.name}.lance")