feat(python): support dict to SQL struct conversion in table.update() (#3089)

## Summary

- Add `@value_to_sql.register(dict)` handler that converts Python dicts
to DataFusion's `named_struct()` SQL syntax
- Enables updating struct-typed columns via `table.update(values={"col":
{"field_a": 1, "field_b": "hello"}})`
- Recursively handles nested structs, lists, nulls, and all existing
scalar types

Closes #1363

## Details

The `named_struct` function was introduced in DataFusion 38 and is now
available (LanceDB uses DataFusion 52.1). The implementation follows the
existing `singledispatch` pattern in `util.py`.

**Example conversion:**
```python
value_to_sql({"field_a": 1, "field_b": "hello"})
# => "named_struct('field_a', 1, 'field_b', 'hello')"
```

## Test plan

- [x] Unit tests for flat struct, nested struct, list inside struct,
mixed types, null values, and empty dict
- [ ] CI integration tests with actual table.update() on struct columns

🔗 [DataFusion named_struct
docs](https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct)
This commit is contained in:
mrncstt
2026-03-03 22:36:08 +01:00
committed by GitHub
parent 52ce2c995c
commit 367abe99d2
2 changed files with 36 additions and 0 deletions

View File

@@ -324,6 +324,16 @@ def _(value: list):
return "[" + ", ".join(map(value_to_sql, value)) + "]"
@value_to_sql.register(dict)
def _(value: dict):
# https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct
return (
"named_struct("
+ ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items())
+ ")"
)
@value_to_sql.register(np.ndarray)
def _(value: np.ndarray):
return value_to_sql(value.tolist())

View File

@@ -121,6 +121,32 @@ def test_value_to_sql_string(tmp_path):
assert table.to_pandas().query("search == @value")["replace"].item() == value
def test_value_to_sql_dict():
# Simple flat struct
assert value_to_sql({"a": 1, "b": "hello"}) == "named_struct('a', 1, 'b', 'hello')"
# Nested struct
assert (
value_to_sql({"outer": {"inner": 1}})
== "named_struct('outer', named_struct('inner', 1))"
)
# List inside struct
assert value_to_sql({"a": [1, 2]}) == "named_struct('a', [1, 2])"
# Mixed types
assert (
value_to_sql({"name": "test", "count": 42, "rate": 3.14, "active": True})
== "named_struct('name', 'test', 'count', 42, 'rate', 3.14, 'active', TRUE)"
)
# Null value inside struct
assert value_to_sql({"a": None}) == "named_struct('a', NULL)"
# Empty dict
assert value_to_sql({}) == "named_struct()"
def test_append_vector_columns():
registry = EmbeddingFunctionRegistry.get_instance()
registry.register("test")(MockTextEmbeddingFunction)