diff --git a/python/python/lancedb/util.py b/python/python/lancedb/util.py index a3666c75c..d5b66707f 100644 --- a/python/python/lancedb/util.py +++ b/python/python/lancedb/util.py @@ -324,6 +324,16 @@ def _(value: list): return "[" + ", ".join(map(value_to_sql, value)) + "]" +@value_to_sql.register(dict) +def _(value: dict): + # https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct + return ( + "named_struct(" + + ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items()) + + ")" + ) + + @value_to_sql.register(np.ndarray) def _(value: np.ndarray): return value_to_sql(value.tolist()) diff --git a/python/python/tests/test_util.py b/python/python/tests/test_util.py index e7ba8bf86..74296a221 100644 --- a/python/python/tests/test_util.py +++ b/python/python/tests/test_util.py @@ -121,6 +121,32 @@ def test_value_to_sql_string(tmp_path): assert table.to_pandas().query("search == @value")["replace"].item() == value +def test_value_to_sql_dict(): + # Simple flat struct + assert value_to_sql({"a": 1, "b": "hello"}) == "named_struct('a', 1, 'b', 'hello')" + + # Nested struct + assert ( + value_to_sql({"outer": {"inner": 1}}) + == "named_struct('outer', named_struct('inner', 1))" + ) + + # List inside struct + assert value_to_sql({"a": [1, 2]}) == "named_struct('a', [1, 2])" + + # Mixed types + assert ( + value_to_sql({"name": "test", "count": 42, "rate": 3.14, "active": True}) + == "named_struct('name', 'test', 'count', 42, 'rate', 3.14, 'active', TRUE)" + ) + + # Null value inside struct + assert value_to_sql({"a": None}) == "named_struct('a', NULL)" + + # Empty dict + assert value_to_sql({}) == "named_struct()" + + def test_append_vector_columns(): registry = EmbeddingFunctionRegistry.get_instance() registry.register("test")(MockTextEmbeddingFunction)