mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 14:49:57 +00:00
- Tried to address some onboarding feedbacks listed in https://github.com/lancedb/lancedb/issues/1224 - Improve visibility of pydantic integration and embedding API. (Based on onboarding feedback - Many ways of ingesting data, defining schema but not sure what to use in a specific use-case) - Add a guide that takes users through testing and improving retriever performance using built-in utilities like hybrid-search and reranking - Add some benchmarks for the above - Add missing cohere docs --------- Co-authored-by: Weston Pace <weston.pace@gmail.com>
28 lines
788 B
Python
28 lines
788 B
Python
import lancedb
|
|
|
|
# --8<-- [start:imports]
|
|
from lancedb.pydantic import LanceModel, Vector
|
|
from lancedb.embeddings import get_registry
|
|
|
|
# --8<-- [end:imports]
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.slow
|
|
def test_embeddings_openai():
|
|
# --8<-- [start:openai_embeddings]
|
|
db = lancedb.connect("/tmp/db")
|
|
func = get_registry().get("openai").create(name="text-embedding-ada-002")
|
|
|
|
class Words(LanceModel):
|
|
text: str = func.SourceField()
|
|
vector: Vector(func.ndims()) = func.VectorField()
|
|
|
|
table = db.create_table("words", schema=Words, mode="overwrite")
|
|
table.add([{"text": "hello world"}, {"text": "goodbye world"}])
|
|
|
|
query = "greetings"
|
|
actual = table.search(query).limit(1).to_pydantic(Words)[0]
|
|
print(actual.text)
|
|
# --8<-- [end:openai_embeddings]
|