fix(util): escape single quotes in struct field names in value_to_sql (#3548)

### Bug

`value_to_sql({...})` builds a DataFusion `named_struct(...)` literal
but interpolates the struct field names directly as `f"'{k}'"`. A field
name that contains a single quote therefore produces invalid SQL:

```python
>>> from lancedb.util import value_to_sql
>>> value_to_sql({"it's": 1})
"named_struct('it's', 1)"        # invalid SQL — the quote terminates the literal
```

String *values* are already escaped (single quotes doubled) by the `str`
branch of `value_to_sql`, so keys and values were handled
inconsistently. This affects `Table.update(values={...})` /
`merge_insert` when a struct column has a field name containing `'`.

### Fix

Render the key through `value_to_sql(str(k))` so field names are escaped
exactly like string values:

```python
>>> value_to_sql({"it's": 1})
"named_struct('it''s', 1)"
```

Keys without special characters are unchanged (`'a'` stays `'a'`), so
existing behavior is preserved.

### Verification

```
$ pytest python/tests/test_util.py -k value_to_sql_dict
```

The new `test_value_to_sql_dict_key_escaping` covers quoted keys (incl.
nested structs) and fails on `main` (`named_struct('it's', 1)`), passes
with this change; the existing `test_value_to_sql_dict` still passes.

Co-authored-by: JSap0914 <JSap0914@users.noreply.github.com>
This commit is contained in:
JSap0914
2026-06-18 04:55:43 +09:00
committed by GitHub
parent 394bb34fa2
commit 9128dbcd7a
2 changed files with 22 additions and 1 deletions

View File

@@ -373,9 +373,15 @@ def _(value: list):
@value_to_sql.register(dict)
def _(value: dict):
# https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct
# Render the field name through value_to_sql(str(...)) as well so that keys
# containing characters meaningful in SQL (e.g. a single quote) are escaped
# the same way string values are. A bare f"'{k}'" would emit invalid SQL for
# a key like "it's".
return (
"named_struct("
+ ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items())
+ ", ".join(
f"{value_to_sql(str(k))}, {value_to_sql(v)}" for k, v in value.items()
)
+ ")"
)

View File

@@ -149,6 +149,21 @@ def test_value_to_sql_dict():
assert value_to_sql({}) == "named_struct()"
def test_value_to_sql_dict_key_escaping():
# Struct field names that contain a single quote must be escaped (doubled)
# the same way string values are, otherwise value_to_sql emits invalid SQL
# such as named_struct('it's', 1).
assert value_to_sql({"it's": 1}) == "named_struct('it''s', 1)"
assert (
value_to_sql({"o'brien": "d'angelo"}) == "named_struct('o''brien', 'd''angelo')"
)
# Escaping also applies to keys of nested structs.
assert (
value_to_sql({"outer": {"in'r": 1}})
== "named_struct('outer', named_struct('in''r', 1))"
)
def test_value_to_sql_numpy_scalars():
# numpy scalars (e.g. pulled from an ndarray or a pandas column) must
# convert the same way as their native Python counterparts. np.float64