From 4e442624993dfac3a5fbf1d2ce30aefd1e4ce32d Mon Sep 17 00:00:00 2001 From: Harikrishna KP Date: Thu, 4 Jun 2026 02:43:09 +0530 Subject: [PATCH] test(python): add regression test for nullable struct with None (#2654) (#3483) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Regression test for [issue #2654](https://github.com/lancedb/lancedb/issues/2654) — a nullable struct column whose first batch contains only `None` values crashed in `_align_field_types` with `AttributeError: 'pyarrow.lib.DataType' object has no attribute 'fields'`. The actual fix landed in #3394, but no test was added. This PR adds the reproducer from the issue as a test. ## Test plan - `test_add_nullable_struct_with_none`: creates a table with a nullable struct column, adds a row with a non-null struct value, then a row with `None` for the struct field. Verifies both rows land correctly. - Uses Lance file format v2.1 (`new_table_data_storage_version="2.1"`) because nullable structs aren't supported on v2.0. ## Related - #3028 (the original fix attempt, now superseded) --- python/python/tests/test_table.py | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index 5892dda42..ade086a35 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -1288,6 +1288,45 @@ def test_add_with_empty_fixed_size_list_drops_bad_rows(mem_db: DBConnection): assert np.allclose(data["embedding"].to_pylist()[0], np.array([0.1] * 16)) +def test_add_nullable_struct_with_none(mem_db: DBConnection): + """Regression test for issue #2654: a nullable struct column whose + first batch contains only None values must not crash in + _align_field_types with AttributeError: 'pyarrow.lib.DataType' + object has no attribute 'fields'. + + PyArrow infers an all-None struct column as `null` (not `struct`), + so the type-alignment path needs to handle the case where the + source field type is null and use the target type directly. + """ + # Use the v2.1 file format so that nullable structs are supported. + table = mem_db.create_table( + "test_nullable_struct", + schema=pa.schema( + [ + pa.field("id", pa.string()), + pa.field( + "data", + pa.struct([pa.field("x", pa.float32())]), + nullable=True, + ), + ] + ), + storage_options=dict(new_table_data_storage_version="2.1"), + ) + + # Adding a row with a non-null struct should work. + table.add([{"id": "1", "data": {"x": 1.0}}]) + + # Adding a row with None for the nullable struct field should also + # work — this is what used to crash. + table.add([{"id": "2", "data": None}]) + + result = table.to_arrow() + assert result.num_rows == 2 + assert result.column("id").to_pylist() == ["1", "2"] + assert result.column("data").to_pylist() == [{"x": 1.0}, None] + + def test_add_with_integer_embeddings_preserves_casting(mem_db: DBConnection): class Schema(LanceModel): text: str