mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-09 21:32:58 +00:00
1. Support persistent embedding function so users can just search using query string 2. Add fixed size list conversion for multiple vector columns 3. Add support for empty query (just apply select/where/limit). 4. Refactor and simplify some of the data prep code --------- Co-authored-by: Chang She <chang@lancedb.com> Co-authored-by: Weston Pace <weston.pace@gmail.com>
39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
import os
|
|
|
|
import pyarrow as pa
|
|
import pytest
|
|
|
|
from lancedb.embeddings import EmbeddingFunctionModel, EmbeddingFunctionRegistry
|
|
|
|
# import lancedb so we don't have to in every example
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def doctest_setup(monkeypatch, tmpdir):
|
|
# disable color for doctests so we don't have to include
|
|
# escape codes in docstrings
|
|
monkeypatch.setitem(os.environ, "NO_COLOR", "1")
|
|
# Explicitly set the column width
|
|
monkeypatch.setitem(os.environ, "COLUMNS", "80")
|
|
# Work in a temporary directory
|
|
monkeypatch.chdir(tmpdir)
|
|
|
|
|
|
registry = EmbeddingFunctionRegistry.get_instance()
|
|
|
|
|
|
@registry.register()
|
|
class MockEmbeddingFunction(EmbeddingFunctionModel):
|
|
def __call__(self, data):
|
|
if isinstance(data, str):
|
|
data = [data]
|
|
elif isinstance(data, pa.ChunkedArray):
|
|
data = data.combine_chunks().to_pylist()
|
|
elif isinstance(data, pa.Array):
|
|
data = data.to_pylist()
|
|
|
|
return [self.embed(row) for row in data]
|
|
|
|
def embed(self, row):
|
|
return [float(hash(c)) for c in row[:10]]
|