feat: voyageai support (#1799)

Adding VoyageAI embedding and rerank support
This commit is contained in:
fzowl
2024-11-08 20:21:20 +01:00
committed by GitHub
parent 21021f94ca
commit cbbc07d0f5
9 changed files with 423 additions and 0 deletions

View File

@@ -196,6 +196,7 @@ def test_add_optional_vector(tmp_path):
"ollama",
"cohere",
"instructor",
"voyageai",
],
)
def test_embedding_function_safe_model_dump(embedding_type):

View File

@@ -481,3 +481,22 @@ def test_ollama_embedding(tmp_path):
json.dumps(dumped_model)
except TypeError:
pytest.fail("Failed to JSON serialize the dumped model")
@pytest.mark.slow
@pytest.mark.skipif(
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
)
def test_voyageai_embedding_function():
voyageai = get_registry().get("voyageai").create(name="voyage-3", max_retries=0)
class TextModel(LanceModel):
text: str = voyageai.SourceField()
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
db = lancedb.connect("~/lancedb")
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(df)
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()

View File

@@ -16,6 +16,7 @@ from lancedb.rerankers import (
OpenaiReranker,
JinaReranker,
AnswerdotaiRerankers,
VoyageAIReranker,
)
from lancedb.table import LanceTable
@@ -344,3 +345,14 @@ def test_jina_reranker(tmp_path, use_tantivy):
table, schema = get_test_table(tmp_path, use_tantivy)
reranker = JinaReranker()
_run_test_reranker(reranker, table, "single player experience", None, schema)
@pytest.mark.skipif(
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
)
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_voyageai_reranker(tmp_path, use_tantivy):
pytest.importorskip("voyageai")
reranker = VoyageAIReranker(model_name="rerank-2")
table, schema = get_test_table(tmp_path, use_tantivy)
_run_test_reranker(reranker, table, "single player experience", None, schema)