mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-30 10:20:40 +00:00
docs: improve pydantic integration docs (#2136)
Address usage mistakes in https://github.com/lancedb/lancedb/issues/2135. * Add example of how to use `LanceModel` and `Vector` decorator * Add test for pydantic doc * Fix the example to directly use LanceModel instead of calling `MyModel.to_arrow_schema()` in the example. * Add cross-reference link to pydantic doc site * Configure mkdocs to watch code changes in python directory.
This commit is contained in:
@@ -4,6 +4,9 @@ repo_url: https://github.com/lancedb/lancedb
|
||||
edit_uri: https://github.com/lancedb/lancedb/tree/main/docs/src
|
||||
repo_name: lancedb/lancedb
|
||||
docs_dir: src
|
||||
watch:
|
||||
- src
|
||||
- ../python/python
|
||||
|
||||
theme:
|
||||
name: "material"
|
||||
@@ -63,6 +66,7 @@ plugins:
|
||||
- https://arrow.apache.org/docs/objects.inv
|
||||
- https://pandas.pydata.org/docs/objects.inv
|
||||
- https://lancedb.github.io/lance/objects.inv
|
||||
- https://docs.pydantic.dev/latest/objects.inv
|
||||
- mkdocs-jupyter
|
||||
- render_swagger:
|
||||
allow_arbitrary_locations: true
|
||||
@@ -105,8 +109,8 @@ nav:
|
||||
- 📚 Concepts:
|
||||
- Vector search: concepts/vector_search.md
|
||||
- Indexing:
|
||||
- IVFPQ: concepts/index_ivfpq.md
|
||||
- HNSW: concepts/index_hnsw.md
|
||||
- IVFPQ: concepts/index_ivfpq.md
|
||||
- HNSW: concepts/index_hnsw.md
|
||||
- Storage: concepts/storage.md
|
||||
- Data management: concepts/data_management.md
|
||||
- 🔨 Guides:
|
||||
@@ -130,8 +134,8 @@ nav:
|
||||
- Adaptive RAG: rag/adaptive_rag.md
|
||||
- SFR RAG: rag/sfr_rag.md
|
||||
- Advanced Techniques:
|
||||
- HyDE: rag/advanced_techniques/hyde.md
|
||||
- FLARE: rag/advanced_techniques/flare.md
|
||||
- HyDE: rag/advanced_techniques/hyde.md
|
||||
- FLARE: rag/advanced_techniques/flare.md
|
||||
- Reranking:
|
||||
- Quickstart: reranking/index.md
|
||||
- Cohere Reranker: reranking/cohere.md
|
||||
@@ -146,7 +150,7 @@ nav:
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility:
|
||||
- Versioning & Reproducibility:
|
||||
- sync API: notebooks/reproducibility.ipynb
|
||||
- async API: notebooks/reproducibility_async.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
@@ -240,8 +244,8 @@ nav:
|
||||
- Concepts:
|
||||
- Vector search: concepts/vector_search.md
|
||||
- Indexing:
|
||||
- IVFPQ: concepts/index_ivfpq.md
|
||||
- HNSW: concepts/index_hnsw.md
|
||||
- IVFPQ: concepts/index_ivfpq.md
|
||||
- HNSW: concepts/index_hnsw.md
|
||||
- Storage: concepts/storage.md
|
||||
- Data management: concepts/data_management.md
|
||||
- Guides:
|
||||
@@ -265,8 +269,8 @@ nav:
|
||||
- Adaptive RAG: rag/adaptive_rag.md
|
||||
- SFR RAG: rag/sfr_rag.md
|
||||
- Advanced Techniques:
|
||||
- HyDE: rag/advanced_techniques/hyde.md
|
||||
- FLARE: rag/advanced_techniques/flare.md
|
||||
- HyDE: rag/advanced_techniques/hyde.md
|
||||
- FLARE: rag/advanced_techniques/flare.md
|
||||
- Reranking:
|
||||
- Quickstart: reranking/index.md
|
||||
- Cohere Reranker: reranking/cohere.md
|
||||
@@ -280,7 +284,7 @@ nav:
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility:
|
||||
- Versioning & Reproducibility:
|
||||
- sync API: notebooks/reproducibility.ipynb
|
||||
- async API: notebooks/reproducibility_async.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
@@ -349,8 +353,8 @@ nav:
|
||||
- 🦀 Rust:
|
||||
- Overview: examples/examples_rust.md
|
||||
- Studies:
|
||||
- studies/overview.md
|
||||
- ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
|
||||
- studies/overview.md
|
||||
- ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
|
||||
- API reference:
|
||||
- Overview: api_reference.md
|
||||
- Python: python/python.md
|
||||
|
||||
@@ -2,14 +2,19 @@
|
||||
|
||||
[Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
|
||||
LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
|
||||
Using [LanceModel][lancedb.pydantic.LanceModel], users can seamlessly
|
||||
integrate Pydantic with the rest of the LanceDB APIs.
|
||||
|
||||
## Schema
|
||||
```python
|
||||
|
||||
LanceDB supports to create Apache Arrow Schema from a
|
||||
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
|
||||
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:imports"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_model"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:set_url"
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_example"
|
||||
```
|
||||
|
||||
::: lancedb.pydantic.pydantic_to_schema
|
||||
|
||||
## Vector Field
|
||||
|
||||
@@ -34,3 +39,9 @@ Current supported type conversions:
|
||||
| `list` | `pyarrow.List` |
|
||||
| `BaseModel` | `pyarrow.Struct` |
|
||||
| `Vector(n)` | `pyarrow.FixedSizeList(float32, n)` |
|
||||
|
||||
LanceDB supports to create Apache Arrow Schema from a
|
||||
[Pydantic BaseModel][pydantic.BaseModel]
|
||||
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
|
||||
|
||||
::: lancedb.pydantic.pydantic_to_schema
|
||||
|
||||
@@ -15,6 +15,7 @@ excluded_globs = [
|
||||
"../src/python/duckdb.md",
|
||||
"../src/python/pandas_and_pyarrow.md",
|
||||
"../src/python/polars_arrow.md",
|
||||
"../src/python/pydantic.md",
|
||||
"../src/embeddings/*.md",
|
||||
"../src/concepts/*.md",
|
||||
"../src/ann_indexes.md",
|
||||
|
||||
@@ -259,7 +259,8 @@ def _pydantic_to_field(name: str, field: FieldInfo) -> pa.Field:
|
||||
|
||||
|
||||
def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
|
||||
"""Convert a Pydantic model to a PyArrow Schema.
|
||||
"""Convert a [Pydantic Model][pydantic.BaseModel] to a
|
||||
[PyArrow Schema][pyarrow.Schema].
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -269,24 +270,25 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
|
||||
Returns
|
||||
-------
|
||||
pyarrow.Schema
|
||||
The Arrow Schema
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> from typing import List, Optional
|
||||
>>> import pydantic
|
||||
>>> from lancedb.pydantic import pydantic_to_schema
|
||||
>>> from lancedb.pydantic import pydantic_to_schema, Vector
|
||||
>>> class FooModel(pydantic.BaseModel):
|
||||
... id: int
|
||||
... s: str
|
||||
... vec: List[float]
|
||||
... vec: Vector(1536) # fixed_size_list<item: float32>[1536]
|
||||
... li: List[int]
|
||||
...
|
||||
>>> schema = pydantic_to_schema(FooModel)
|
||||
>>> assert schema == pa.schema([
|
||||
... pa.field("id", pa.int64(), False),
|
||||
... pa.field("s", pa.utf8(), False),
|
||||
... pa.field("vec", pa.list_(pa.float64()), False),
|
||||
... pa.field("vec", pa.list_(pa.float32(), 1536)),
|
||||
... pa.field("li", pa.list_(pa.int64()), False),
|
||||
... ])
|
||||
"""
|
||||
@@ -308,7 +310,7 @@ class LanceModel(pydantic.BaseModel):
|
||||
... vector: Vector(2)
|
||||
...
|
||||
>>> db = lancedb.connect("./example")
|
||||
>>> table = db.create_table("test", schema=TestModel.to_arrow_schema())
|
||||
>>> table = db.create_table("test", schema=TestModel)
|
||||
>>> table.add([
|
||||
... TestModel(name="test", vector=[1.0, 2.0])
|
||||
... ])
|
||||
|
||||
36
python/python/tests/docs/test_pydantic_integration.py
Normal file
36
python/python/tests/docs/test_pydantic_integration.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
# --8<-- [start:imports]
|
||||
import lancedb
|
||||
from lancedb.pydantic import Vector, LanceModel
|
||||
# --8<-- [end:imports]
|
||||
|
||||
|
||||
def test_pydantic_model(tmp_path):
|
||||
# --8<-- [start:base_model]
|
||||
class PersonModel(LanceModel):
|
||||
name: str
|
||||
age: int
|
||||
vector: Vector(2)
|
||||
|
||||
# --8<-- [end:base_model]
|
||||
|
||||
# --8<-- [start:set_url]
|
||||
url = "./example"
|
||||
# --8<-- [end:set_url]
|
||||
url = tmp_path
|
||||
|
||||
# --8<-- [start:base_example]
|
||||
db = lancedb.connect(url)
|
||||
table = db.create_table("person", schema=PersonModel)
|
||||
table.add(
|
||||
[
|
||||
PersonModel(name="bob", age=1, vector=[1.0, 2.0]),
|
||||
PersonModel(name="alice", age=2, vector=[3.0, 4.0]),
|
||||
]
|
||||
)
|
||||
assert table.count_rows() == 2
|
||||
person = table.search([0.0, 0.0]).limit(1).to_pydantic(PersonModel)
|
||||
assert person[0].name == "bob"
|
||||
# --8<-- [end:base_example]
|
||||
Reference in New Issue
Block a user