mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-09 05:12:58 +00:00
[python] Use pydantic for embedding function persistence (#467)
1. Support persistent embedding function so users can just search using query string 2. Add fixed size list conversion for multiple vector columns 3. Add support for empty query (just apply select/where/limit). 4. Refactor and simplify some of the data prep code --------- Co-authored-by: Chang She <chang@lancedb.com> Co-authored-by: Weston Pace <weston.pace@gmail.com>
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
import os
|
||||
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
from lancedb.embeddings import EmbeddingFunctionModel, EmbeddingFunctionRegistry
|
||||
|
||||
# import lancedb so we don't have to in every example
|
||||
|
||||
|
||||
@@ -14,3 +17,22 @@ def doctest_setup(monkeypatch, tmpdir):
|
||||
monkeypatch.setitem(os.environ, "COLUMNS", "80")
|
||||
# Work in a temporary directory
|
||||
monkeypatch.chdir(tmpdir)
|
||||
|
||||
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
|
||||
|
||||
@registry.register()
|
||||
class MockEmbeddingFunction(EmbeddingFunctionModel):
|
||||
def __call__(self, data):
|
||||
if isinstance(data, str):
|
||||
data = [data]
|
||||
elif isinstance(data, pa.ChunkedArray):
|
||||
data = data.combine_chunks().to_pylist()
|
||||
elif isinstance(data, pa.Array):
|
||||
data = data.to_pylist()
|
||||
|
||||
return [self.embed(row) for row in data]
|
||||
|
||||
def embed(self, row):
|
||||
return [float(hash(c)) for c in row[:10]]
|
||||
|
||||
Reference in New Issue
Block a user