test(python): add regression test for nullable struct with None (#2654) (#3483)

## Summary

Regression test for [issue
#2654](https://github.com/lancedb/lancedb/issues/2654) — a nullable
struct column whose first batch contains only `None` values crashed in
`_align_field_types` with `AttributeError: 'pyarrow.lib.DataType' object
has no attribute 'fields'`.

The actual fix landed in #3394, but no test was added. This PR adds the
reproducer from the issue as a test.

## Test plan

- `test_add_nullable_struct_with_none`: creates a table with a nullable
struct column, adds a row with a non-null struct value, then a row with
`None` for the struct field. Verifies both rows land correctly.
- Uses Lance file format v2.1 (`new_table_data_storage_version="2.1"`)
because nullable structs aren't supported on v2.0.

## Related

- #3028 (the original fix attempt, now superseded)
This commit is contained in:
Harikrishna KP
2026-06-04 02:43:09 +05:30
committed by GitHub
parent 632375faf1
commit 4e44262499

View File

@@ -1288,6 +1288,45 @@ def test_add_with_empty_fixed_size_list_drops_bad_rows(mem_db: DBConnection):
assert np.allclose(data["embedding"].to_pylist()[0], np.array([0.1] * 16))
def test_add_nullable_struct_with_none(mem_db: DBConnection):
"""Regression test for issue #2654: a nullable struct column whose
first batch contains only None values must not crash in
_align_field_types with AttributeError: 'pyarrow.lib.DataType'
object has no attribute 'fields'.
PyArrow infers an all-None struct column as `null` (not `struct`),
so the type-alignment path needs to handle the case where the
source field type is null and use the target type directly.
"""
# Use the v2.1 file format so that nullable structs are supported.
table = mem_db.create_table(
"test_nullable_struct",
schema=pa.schema(
[
pa.field("id", pa.string()),
pa.field(
"data",
pa.struct([pa.field("x", pa.float32())]),
nullable=True,
),
]
),
storage_options=dict(new_table_data_storage_version="2.1"),
)
# Adding a row with a non-null struct should work.
table.add([{"id": "1", "data": {"x": 1.0}}])
# Adding a row with None for the nullable struct field should also
# work — this is what used to crash.
table.add([{"id": "2", "data": None}])
result = table.to_arrow()
assert result.num_rows == 2
assert result.column("id").to_pylist() == ["1", "2"]
assert result.column("data").to_pylist() == [{"x": 1.0}, None]
def test_add_with_integer_embeddings_preserves_casting(mem_db: DBConnection):
class Schema(LanceModel):
text: str