From 9128dbcd7a95d7237507272c4adaa2e3dc164d3d Mon Sep 17 00:00:00 2001 From: JSap0914 <116227558+JSap0914@users.noreply.github.com> Date: Thu, 18 Jun 2026 04:55:43 +0900 Subject: [PATCH] fix(util): escape single quotes in struct field names in value_to_sql (#3548) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Bug `value_to_sql({...})` builds a DataFusion `named_struct(...)` literal but interpolates the struct field names directly as `f"'{k}'"`. A field name that contains a single quote therefore produces invalid SQL: ```python >>> from lancedb.util import value_to_sql >>> value_to_sql({"it's": 1}) "named_struct('it's', 1)" # invalid SQL — the quote terminates the literal ``` String *values* are already escaped (single quotes doubled) by the `str` branch of `value_to_sql`, so keys and values were handled inconsistently. This affects `Table.update(values={...})` / `merge_insert` when a struct column has a field name containing `'`. ### Fix Render the key through `value_to_sql(str(k))` so field names are escaped exactly like string values: ```python >>> value_to_sql({"it's": 1}) "named_struct('it''s', 1)" ``` Keys without special characters are unchanged (`'a'` stays `'a'`), so existing behavior is preserved. ### Verification ``` $ pytest python/tests/test_util.py -k value_to_sql_dict ``` The new `test_value_to_sql_dict_key_escaping` covers quoted keys (incl. nested structs) and fails on `main` (`named_struct('it's', 1)`), passes with this change; the existing `test_value_to_sql_dict` still passes. Co-authored-by: JSap0914 --- python/python/lancedb/util.py | 8 +++++++- python/python/tests/test_util.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/python/python/lancedb/util.py b/python/python/lancedb/util.py index e9d01407b..cd170c221 100644 --- a/python/python/lancedb/util.py +++ b/python/python/lancedb/util.py @@ -373,9 +373,15 @@ def _(value: list): @value_to_sql.register(dict) def _(value: dict): # https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct + # Render the field name through value_to_sql(str(...)) as well so that keys + # containing characters meaningful in SQL (e.g. a single quote) are escaped + # the same way string values are. A bare f"'{k}'" would emit invalid SQL for + # a key like "it's". return ( "named_struct(" - + ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items()) + + ", ".join( + f"{value_to_sql(str(k))}, {value_to_sql(v)}" for k, v in value.items() + ) + ")" ) diff --git a/python/python/tests/test_util.py b/python/python/tests/test_util.py index f3051d45a..c96407779 100644 --- a/python/python/tests/test_util.py +++ b/python/python/tests/test_util.py @@ -149,6 +149,21 @@ def test_value_to_sql_dict(): assert value_to_sql({}) == "named_struct()" +def test_value_to_sql_dict_key_escaping(): + # Struct field names that contain a single quote must be escaped (doubled) + # the same way string values are, otherwise value_to_sql emits invalid SQL + # such as named_struct('it's', 1). + assert value_to_sql({"it's": 1}) == "named_struct('it''s', 1)" + assert ( + value_to_sql({"o'brien": "d'angelo"}) == "named_struct('o''brien', 'd''angelo')" + ) + # Escaping also applies to keys of nested structs. + assert ( + value_to_sql({"outer": {"in'r": 1}}) + == "named_struct('outer', named_struct('in''r', 1))" + ) + + def test_value_to_sql_numpy_scalars(): # numpy scalars (e.g. pulled from an ndarray or a pandas column) must # convert the same way as their native Python counterparts. np.float64