Files
lancedb/python/lancedb/conftest.py
Chang She 9a9a73a65d [python] Use pydantic for embedding function persistence (#467)
1. Support persistent embedding function so users can just search using
query string
2. Add fixed size list conversion for multiple vector columns
3. Add support for empty query (just apply select/where/limit).
4. Refactor and simplify some of the data prep code

---------

Co-authored-by: Chang She <chang@lancedb.com>
Co-authored-by: Weston Pace <weston.pace@gmail.com>
2023-09-05 21:30:45 -07:00

39 lines
1.1 KiB
Python

import os
import pyarrow as pa
import pytest
from lancedb.embeddings import EmbeddingFunctionModel, EmbeddingFunctionRegistry
# import lancedb so we don't have to in every example
@pytest.fixture(autouse=True)
def doctest_setup(monkeypatch, tmpdir):
# disable color for doctests so we don't have to include
# escape codes in docstrings
monkeypatch.setitem(os.environ, "NO_COLOR", "1")
# Explicitly set the column width
monkeypatch.setitem(os.environ, "COLUMNS", "80")
# Work in a temporary directory
monkeypatch.chdir(tmpdir)
registry = EmbeddingFunctionRegistry.get_instance()
@registry.register()
class MockEmbeddingFunction(EmbeddingFunctionModel):
def __call__(self, data):
if isinstance(data, str):
data = [data]
elif isinstance(data, pa.ChunkedArray):
data = data.combine_chunks().to_pylist()
elif isinstance(data, pa.Array):
data = data.to_pylist()
return [self.embed(row) for row in data]
def embed(self, row):
return [float(hash(c)) for c in row[:10]]