fix(EmbeddingFunction): modify safe_model_dump to explicitly exclude class fields with underscore (#1688)

Resolve issue #1681 --------- Co-authored-by: rjrobben <rjrobben123@gmail.com>
2026-05-14 10:30:40 +00:00 · 2024-09-26 02:53:49 +08:00
parent 8f0eb34109
commit e606a455df
3 changed files with 89 additions and 2 deletions
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -183,3 +183,45 @@ def test_add_optional_vector(tmp_path):
    expected = LanceSchema(id="id", text="text")
    tbl.add([expected])
    assert not (np.abs(tbl.to_pandas()["vector"][0]) < 1e-6).all()
+
+
+@pytest.mark.parametrize(
+    "embedding_type",
+    [
+        "openai",
+        "sentence-transformers",
+        "huggingface",
+        "ollama",
+        "cohere",
+        "instructor",
+    ],
+)
+def test_embedding_function_safe_model_dump(embedding_type):
+    registry = get_registry()
+
+    # Note: Some embedding types might require specific parameters
+    try:
+        model = registry.get(embedding_type).create()
+    except Exception as e:
+        pytest.skip(f"Skipping {embedding_type} due to error: {str(e)}")
+
+    dumped_model = model.safe_model_dump()
+
+    assert all(
+        not k.startswith("_") for k in dumped_model.keys()
+    ), f"{embedding_type}: Dumped model contains keys starting with underscore"
+
+    assert (
+        "max_retries" in dumped_model
+    ), f"{embedding_type}: Essential field 'max_retries' is missing from dumped model"
+
+    assert isinstance(
+        dumped_model, dict
+    ), f"{embedding_type}: Dumped model is not a dictionary"
+
+    for key in model.__dict__:
+        if key.startswith("_"):
+            assert key not in dumped_model, (
+                f"{embedding_type}: Private attribute '{key}' "
+                f"is present in dumped model"
+            )