mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-02 18:40:40 +00:00
fix(util): escape single quotes in struct field names in value_to_sql (#3548)
### Bug
`value_to_sql({...})` builds a DataFusion `named_struct(...)` literal
but interpolates the struct field names directly as `f"'{k}'"`. A field
name that contains a single quote therefore produces invalid SQL:
```python
>>> from lancedb.util import value_to_sql
>>> value_to_sql({"it's": 1})
"named_struct('it's', 1)" # invalid SQL — the quote terminates the literal
```
String *values* are already escaped (single quotes doubled) by the `str`
branch of `value_to_sql`, so keys and values were handled
inconsistently. This affects `Table.update(values={...})` /
`merge_insert` when a struct column has a field name containing `'`.
### Fix
Render the key through `value_to_sql(str(k))` so field names are escaped
exactly like string values:
```python
>>> value_to_sql({"it's": 1})
"named_struct('it''s', 1)"
```
Keys without special characters are unchanged (`'a'` stays `'a'`), so
existing behavior is preserved.
### Verification
```
$ pytest python/tests/test_util.py -k value_to_sql_dict
```
The new `test_value_to_sql_dict_key_escaping` covers quoted keys (incl.
nested structs) and fails on `main` (`named_struct('it's', 1)`), passes
with this change; the existing `test_value_to_sql_dict` still passes.
Co-authored-by: JSap0914 <JSap0914@users.noreply.github.com>
This commit is contained in:
@@ -373,9 +373,15 @@ def _(value: list):
|
||||
@value_to_sql.register(dict)
|
||||
def _(value: dict):
|
||||
# https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct
|
||||
# Render the field name through value_to_sql(str(...)) as well so that keys
|
||||
# containing characters meaningful in SQL (e.g. a single quote) are escaped
|
||||
# the same way string values are. A bare f"'{k}'" would emit invalid SQL for
|
||||
# a key like "it's".
|
||||
return (
|
||||
"named_struct("
|
||||
+ ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items())
|
||||
+ ", ".join(
|
||||
f"{value_to_sql(str(k))}, {value_to_sql(v)}" for k, v in value.items()
|
||||
)
|
||||
+ ")"
|
||||
)
|
||||
|
||||
|
||||
@@ -149,6 +149,21 @@ def test_value_to_sql_dict():
|
||||
assert value_to_sql({}) == "named_struct()"
|
||||
|
||||
|
||||
def test_value_to_sql_dict_key_escaping():
|
||||
# Struct field names that contain a single quote must be escaped (doubled)
|
||||
# the same way string values are, otherwise value_to_sql emits invalid SQL
|
||||
# such as named_struct('it's', 1).
|
||||
assert value_to_sql({"it's": 1}) == "named_struct('it''s', 1)"
|
||||
assert (
|
||||
value_to_sql({"o'brien": "d'angelo"}) == "named_struct('o''brien', 'd''angelo')"
|
||||
)
|
||||
# Escaping also applies to keys of nested structs.
|
||||
assert (
|
||||
value_to_sql({"outer": {"in'r": 1}})
|
||||
== "named_struct('outer', named_struct('in''r', 1))"
|
||||
)
|
||||
|
||||
|
||||
def test_value_to_sql_numpy_scalars():
|
||||
# numpy scalars (e.g. pulled from an ndarray or a pandas column) must
|
||||
# convert the same way as their native Python counterparts. np.float64
|
||||
|
||||
Reference in New Issue
Block a user