feat!: add variable store to embeddings registry (#2112)

BREAKING CHANGE: embedding function implementations in Node need to now
call `resolveVariables()` in their constructors and should **not**
implement `toJSON()`.

This tries to address the handling of secrets. In Node, they are
currently lost. In Python, they are currently leaked into the table
schema metadata.

This PR introduces an in-memory variable store on the function registry.
It also allows embedding function definitions to label certain config
values as "sensitive", and the preprocessing logic will raise an error
if users try to pass in hard-coded values.

Closes #2110
Closes #521

---------

Co-authored-by: Weston Pace <weston.pace@gmail.com>
This commit is contained in:
Will Jones
2025-02-24 15:52:19 -08:00
committed by GitHub
parent ecdee4d2b1
commit 7ac5f74c80
24 changed files with 699 additions and 175 deletions

View File

@@ -43,12 +43,17 @@ test("custom embedding function", async () => {
@register("my_embedding")
class MyEmbeddingFunction extends EmbeddingFunction<string> {
toJSON(): object {
return {};
constructor(optionsRaw = {}) {
super();
const options = this.resolveVariables(optionsRaw);
// Initialize using options
}
ndims() {
return 3;
}
protected getSensitiveKeys(): string[] {
return [];
}
embeddingDataType(): Float {
return new Float32();
}
@@ -94,3 +99,14 @@ test("custom embedding function", async () => {
expect(await table2.countRows()).toBe(2);
});
});
test("embedding function api_key", async () => {
// --8<-- [start:register_secret]
const registry = getRegistry();
registry.setVar("api_key", "sk-...");
const func = registry.get("openai")!.create({
apiKey: "$var:api_key",
});
// --8<-- [end:register_secret]
});