diff --git a/python/python/lancedb/util.py b/python/python/lancedb/util.py index e9d01407b..cd170c221 100644 --- a/python/python/lancedb/util.py +++ b/python/python/lancedb/util.py @@ -373,9 +373,15 @@ def _(value: list): @value_to_sql.register(dict) def _(value: dict): # https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct + # Render the field name through value_to_sql(str(...)) as well so that keys + # containing characters meaningful in SQL (e.g. a single quote) are escaped + # the same way string values are. A bare f"'{k}'" would emit invalid SQL for + # a key like "it's". return ( "named_struct(" - + ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items()) + + ", ".join( + f"{value_to_sql(str(k))}, {value_to_sql(v)}" for k, v in value.items() + ) + ")" ) diff --git a/python/python/tests/test_util.py b/python/python/tests/test_util.py index f3051d45a..c96407779 100644 --- a/python/python/tests/test_util.py +++ b/python/python/tests/test_util.py @@ -149,6 +149,21 @@ def test_value_to_sql_dict(): assert value_to_sql({}) == "named_struct()" +def test_value_to_sql_dict_key_escaping(): + # Struct field names that contain a single quote must be escaped (doubled) + # the same way string values are, otherwise value_to_sql emits invalid SQL + # such as named_struct('it's', 1). + assert value_to_sql({"it's": 1}) == "named_struct('it''s', 1)" + assert ( + value_to_sql({"o'brien": "d'angelo"}) == "named_struct('o''brien', 'd''angelo')" + ) + # Escaping also applies to keys of nested structs. + assert ( + value_to_sql({"outer": {"in'r": 1}}) + == "named_struct('outer', named_struct('in''r', 1))" + ) + + def test_value_to_sql_numpy_scalars(): # numpy scalars (e.g. pulled from an ndarray or a pandas column) must # convert the same way as their native Python counterparts. np.float64