feat(python): add option to flatten output in to_pandas (#722)

Closes https://github.com/lancedb/lance/issues/1738

We add a `flatten` parameter to the signature of `to_pandas`. By default
this is None and does nothing.
If set to True or -1, then LanceDB will flatten structs before
converting to a pandas dataframe. All nested structs are also flattened.
If set to any positive integer, then LanceDB will flatten structs up to
the specified level of nesting.

---------

Co-authored-by: Weston Pace <weston.pace@gmail.com>
This commit is contained in:
Chang She
2023-12-20 12:23:07 -08:00
committed by GitHub
parent fff8e399a3
commit 371d2f979e
4 changed files with 141 additions and 5 deletions

View File

@@ -144,9 +144,13 @@ def test_add(db):
def test_add_pydantic_model(db):
# https://github.com/lancedb/lancedb/issues/562
class Metadata(BaseModel):
source: str
timestamp: datetime
class Document(BaseModel):
content: str
source: str
meta: Metadata
class LanceSchema(LanceModel):
id: str
@@ -162,13 +166,21 @@ def test_add_pydantic_model(db):
id="id",
vector=[0.0, 0.0],
li=[1, 2, 3],
payload=Document(content="foo", source="bar"),
payload=Document(
content="foo", meta=Metadata(source="bar", timestamp=datetime.now())
),
)
tbl.add([expected])
result = tbl.search([0.0, 0.0]).limit(1).to_pydantic(LanceSchema)[0]
assert result == expected
flattened = tbl.search([0.0, 0.0]).limit(1).to_pandas(flatten=1)
assert len(flattened.columns) == 6 # _distance is automatically added
really_flattened = tbl.search([0.0, 0.0]).limit(1).to_pandas(flatten=True)
assert len(really_flattened.columns) == 7
def _add(table, schema):
# table = LanceTable(db, "test")