feat(python): add option to flatten output in to_pandas (#722)

Closes https://github.com/lancedb/lance/issues/1738 We add a `flatten` parameter to the signature of `to_pandas`. By default this is None and does nothing. If set to True or -1, then LanceDB will flatten structs before converting to a pandas dataframe. All nested structs are also flattened. If set to any positive integer, then LanceDB will flatten structs up to the specified level of nesting. --------- Co-authored-by: Weston Pace <weston.pace@gmail.com>
2026-01-05 11:22:58 +00:00 · 2023-12-20 12:23:07 -08:00
parent fff8e399a3
commit 371d2f979e
4 changed files with 141 additions and 5 deletions
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -144,9 +144,13 @@ def test_add(db):
 def test_add_pydantic_model(db):
    # https://github.com/lancedb/lancedb/issues/562

+    class Metadata(BaseModel):
+        source: str
+        timestamp: datetime
+
    class Document(BaseModel):
        content: str
-        source: str
+        meta: Metadata

    class LanceSchema(LanceModel):
        id: str
@@ -162,13 +166,21 @@ def test_add_pydantic_model(db):
        id="id",
        vector=[0.0, 0.0],
        li=[1, 2, 3],
-        payload=Document(content="foo", source="bar"),
+        payload=Document(
+            content="foo", meta=Metadata(source="bar", timestamp=datetime.now())
+        ),
    )
    tbl.add([expected])

    result = tbl.search([0.0, 0.0]).limit(1).to_pydantic(LanceSchema)[0]
    assert result == expected

+    flattened = tbl.search([0.0, 0.0]).limit(1).to_pandas(flatten=1)
+    assert len(flattened.columns) == 6  # _distance is automatically added
+
+    really_flattened = tbl.search([0.0, 0.0]).limit(1).to_pandas(flatten=True)
+    assert len(really_flattened.columns) == 7
+

 def _add(table, schema):
    # table = LanceTable(db, "test")