feat: add to_list and to_pandas api's (#556)

Add `to_list` to return query results as list of python dict (so we're
not too pandas-centric). Closes #555

Add `to_pandas` API and add deprecation warning on `to_df`. Closes #545

Co-authored-by: Chang She <chang@lancedb.com>
This commit is contained in:
Chang She
2023-10-11 12:18:55 -07:00
committed by Weston Pace
parent a737bbff19
commit 8469d010f8
26 changed files with 125 additions and 71 deletions

View File

@@ -16,10 +16,12 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from typing import List, Literal, Optional, Type, Union
import deprecation
import numpy as np
import pyarrow as pa
import pydantic
from . import __version__
from .common import VECTOR_COLUMN_NAME
from .pydantic import LanceModel
from .util import safe_import_pandas
@@ -127,7 +129,24 @@ class LanceQueryBuilder(ABC):
self._columns = None
self._where = None
@deprecation.deprecated(
deprecated_in="0.3.1",
removed_in="0.4.0",
current_version=__version__,
details="Use the bar function instead",
)
def to_df(self) -> "pd.DataFrame":
"""
Deprecated alias for `to_pandas()`. Please use `to_pandas()` instead.
Execute the query and return the results as a pandas DataFrame.
In addition to the selected columns, LanceDB also returns a vector
and also the "_distance" column which is the distance between the query
vector and the returned vector.
"""
return self.to_pandas()
def to_pandas(self) -> "pd.DataFrame":
"""
Execute the query and return the results as a pandas DataFrame.
In addition to the selected columns, LanceDB also returns a vector
@@ -148,6 +167,16 @@ class LanceQueryBuilder(ABC):
"""
raise NotImplementedError
def to_list(self) -> List[dict]:
"""
Execute the query and return the results as a list of dictionaries.
Each list entry is a dictionary with the selected column names as keys,
or all table columns if `select` is not called. The vector and the "_distance"
fields are returned whether or not they're explicitly selected.
"""
return self.to_arrow().to_pylist()
def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
"""Return the table as a list of pydantic models.
@@ -232,7 +261,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
... .where("b < 10")
... .select(["b"])
... .limit(2)
... .to_df())
... .to_pandas())
b vector _distance
0 6 [0.4, 0.4] 0.0
"""