docs: add retriever guide, address minor onboarding feedbacks & enhancement (#1326)

- Tried to address some onboarding feedbacks listed in
https://github.com/lancedb/lancedb/issues/1224
- Improve visibility of pydantic integration and embedding API. (Based
on onboarding feedback - Many ways of ingesting data, defining schema
but not sure what to use in a specific use-case)
- Add a guide that takes users through testing and improving retriever
performance using built-in utilities like hybrid-search and reranking
- Add some benchmarks for the above
- Add missing cohere docs

---------

Co-authored-by: Weston Pace <weston.pace@gmail.com>
This commit is contained in:
Ayush Chaurasia
2024-06-08 06:25:31 +05:30
committed by GitHub
parent 007f9c1af8
commit 76fc16c7a1
10 changed files with 372 additions and 4 deletions

View File

@@ -0,0 +1,27 @@
import lancedb
# --8<-- [start:imports]
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry
# --8<-- [end:imports]
import pytest
@pytest.mark.slow
def test_embeddings_openai():
# --8<-- [start:openai_embeddings]
db = lancedb.connect("/tmp/db")
func = get_registry().get("openai").create(name="text-embedding-ada-002")
class Words(LanceModel):
text: str = func.SourceField()
vector: Vector(func.ndims()) = func.VectorField()
table = db.create_table("words", schema=Words, mode="overwrite")
table.add([{"text": "hello world"}, {"text": "goodbye world"}])
query = "greetings"
actual = table.search(query).limit(1).to_pydantic(Words)[0]
print(actual.text)
# --8<-- [end:openai_embeddings]