From 367abe99d22db05a1c36163dd1a826fc1e234dfb Mon Sep 17 00:00:00 2001 From: mrncstt Date: Tue, 3 Mar 2026 22:36:08 +0100 Subject: [PATCH] feat(python): support dict to SQL struct conversion in table.update() (#3089) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Add `@value_to_sql.register(dict)` handler that converts Python dicts to DataFusion's `named_struct()` SQL syntax - Enables updating struct-typed columns via `table.update(values={"col": {"field_a": 1, "field_b": "hello"}})` - Recursively handles nested structs, lists, nulls, and all existing scalar types Closes #1363 ## Details The `named_struct` function was introduced in DataFusion 38 and is now available (LanceDB uses DataFusion 52.1). The implementation follows the existing `singledispatch` pattern in `util.py`. **Example conversion:** ```python value_to_sql({"field_a": 1, "field_b": "hello"}) # => "named_struct('field_a', 1, 'field_b', 'hello')" ``` ## Test plan - [x] Unit tests for flat struct, nested struct, list inside struct, mixed types, null values, and empty dict - [ ] CI integration tests with actual table.update() on struct columns 🔗 [DataFusion named_struct docs](https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct) --- python/python/lancedb/util.py | 10 ++++++++++ python/python/tests/test_util.py | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/python/python/lancedb/util.py b/python/python/lancedb/util.py index a3666c75c..d5b66707f 100644 --- a/python/python/lancedb/util.py +++ b/python/python/lancedb/util.py @@ -324,6 +324,16 @@ def _(value: list): return "[" + ", ".join(map(value_to_sql, value)) + "]" +@value_to_sql.register(dict) +def _(value: dict): + # https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct + return ( + "named_struct(" + + ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items()) + + ")" + ) + + @value_to_sql.register(np.ndarray) def _(value: np.ndarray): return value_to_sql(value.tolist()) diff --git a/python/python/tests/test_util.py b/python/python/tests/test_util.py index e7ba8bf86..74296a221 100644 --- a/python/python/tests/test_util.py +++ b/python/python/tests/test_util.py @@ -121,6 +121,32 @@ def test_value_to_sql_string(tmp_path): assert table.to_pandas().query("search == @value")["replace"].item() == value +def test_value_to_sql_dict(): + # Simple flat struct + assert value_to_sql({"a": 1, "b": "hello"}) == "named_struct('a', 1, 'b', 'hello')" + + # Nested struct + assert ( + value_to_sql({"outer": {"inner": 1}}) + == "named_struct('outer', named_struct('inner', 1))" + ) + + # List inside struct + assert value_to_sql({"a": [1, 2]}) == "named_struct('a', [1, 2])" + + # Mixed types + assert ( + value_to_sql({"name": "test", "count": 42, "rate": 3.14, "active": True}) + == "named_struct('name', 'test', 'count', 42, 'rate', 3.14, 'active', TRUE)" + ) + + # Null value inside struct + assert value_to_sql({"a": None}) == "named_struct('a', NULL)" + + # Empty dict + assert value_to_sql({}) == "named_struct()" + + def test_append_vector_columns(): registry = EmbeddingFunctionRegistry.get_instance() registry.register("test")(MockTextEmbeddingFunction)