diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 6df3c1d7..99bcce89 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -4,6 +4,9 @@ repo_url: https://github.com/lancedb/lancedb edit_uri: https://github.com/lancedb/lancedb/tree/main/docs/src repo_name: lancedb/lancedb docs_dir: src +watch: + - src + - ../python/python theme: name: "material" @@ -63,6 +66,7 @@ plugins: - https://arrow.apache.org/docs/objects.inv - https://pandas.pydata.org/docs/objects.inv - https://lancedb.github.io/lance/objects.inv + - https://docs.pydantic.dev/latest/objects.inv - mkdocs-jupyter - render_swagger: allow_arbitrary_locations: true @@ -105,8 +109,8 @@ nav: - 📚 Concepts: - Vector search: concepts/vector_search.md - Indexing: - - IVFPQ: concepts/index_ivfpq.md - - HNSW: concepts/index_hnsw.md + - IVFPQ: concepts/index_ivfpq.md + - HNSW: concepts/index_hnsw.md - Storage: concepts/storage.md - Data management: concepts/data_management.md - 🔨 Guides: @@ -130,8 +134,8 @@ nav: - Adaptive RAG: rag/adaptive_rag.md - SFR RAG: rag/sfr_rag.md - Advanced Techniques: - - HyDE: rag/advanced_techniques/hyde.md - - FLARE: rag/advanced_techniques/flare.md + - HyDE: rag/advanced_techniques/hyde.md + - FLARE: rag/advanced_techniques/flare.md - Reranking: - Quickstart: reranking/index.md - Cohere Reranker: reranking/cohere.md @@ -146,7 +150,7 @@ nav: - Building Custom Rerankers: reranking/custom_reranker.md - Example: notebooks/lancedb_reranking.ipynb - Filtering: sql.md - - Versioning & Reproducibility: + - Versioning & Reproducibility: - sync API: notebooks/reproducibility.ipynb - async API: notebooks/reproducibility_async.ipynb - Configuring Storage: guides/storage.md @@ -240,8 +244,8 @@ nav: - Concepts: - Vector search: concepts/vector_search.md - Indexing: - - IVFPQ: concepts/index_ivfpq.md - - HNSW: concepts/index_hnsw.md + - IVFPQ: concepts/index_ivfpq.md + - HNSW: concepts/index_hnsw.md - Storage: concepts/storage.md - Data management: concepts/data_management.md - Guides: @@ -265,8 +269,8 @@ nav: - Adaptive RAG: rag/adaptive_rag.md - SFR RAG: rag/sfr_rag.md - Advanced Techniques: - - HyDE: rag/advanced_techniques/hyde.md - - FLARE: rag/advanced_techniques/flare.md + - HyDE: rag/advanced_techniques/hyde.md + - FLARE: rag/advanced_techniques/flare.md - Reranking: - Quickstart: reranking/index.md - Cohere Reranker: reranking/cohere.md @@ -280,7 +284,7 @@ nav: - Building Custom Rerankers: reranking/custom_reranker.md - Example: notebooks/lancedb_reranking.ipynb - Filtering: sql.md - - Versioning & Reproducibility: + - Versioning & Reproducibility: - sync API: notebooks/reproducibility.ipynb - async API: notebooks/reproducibility_async.ipynb - Configuring Storage: guides/storage.md @@ -349,8 +353,8 @@ nav: - 🦀 Rust: - Overview: examples/examples_rust.md - Studies: - - studies/overview.md - - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/ + - studies/overview.md + - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/ - API reference: - Overview: api_reference.md - Python: python/python.md diff --git a/docs/src/python/pydantic.md b/docs/src/python/pydantic.md index b677c010..bdd521ea 100644 --- a/docs/src/python/pydantic.md +++ b/docs/src/python/pydantic.md @@ -2,14 +2,19 @@ [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python. LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting. +Using [LanceModel][lancedb.pydantic.LanceModel], users can seamlessly +integrate Pydantic with the rest of the LanceDB APIs. -## Schema +```python -LanceDB supports to create Apache Arrow Schema from a -[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel) -via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method. +--8<-- "python/python/tests/docs/test_pydantic_integration.py:imports" + +--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_model" + +--8<-- "python/python/tests/docs/test_pydantic_integration.py:set_url" +--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_example" +``` -::: lancedb.pydantic.pydantic_to_schema ## Vector Field @@ -34,3 +39,9 @@ Current supported type conversions: | `list` | `pyarrow.List` | | `BaseModel` | `pyarrow.Struct` | | `Vector(n)` | `pyarrow.FixedSizeList(float32, n)` | + +LanceDB supports to create Apache Arrow Schema from a +[Pydantic BaseModel][pydantic.BaseModel] +via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method. + +::: lancedb.pydantic.pydantic_to_schema diff --git a/docs/test/md_testing.py b/docs/test/md_testing.py index cb064c71..8db130c1 100755 --- a/docs/test/md_testing.py +++ b/docs/test/md_testing.py @@ -15,6 +15,7 @@ excluded_globs = [ "../src/python/duckdb.md", "../src/python/pandas_and_pyarrow.md", "../src/python/polars_arrow.md", + "../src/python/pydantic.md", "../src/embeddings/*.md", "../src/concepts/*.md", "../src/ann_indexes.md", diff --git a/python/python/lancedb/pydantic.py b/python/python/lancedb/pydantic.py index 855503f2..c665c9de 100644 --- a/python/python/lancedb/pydantic.py +++ b/python/python/lancedb/pydantic.py @@ -259,7 +259,8 @@ def _pydantic_to_field(name: str, field: FieldInfo) -> pa.Field: def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema: - """Convert a Pydantic model to a PyArrow Schema. + """Convert a [Pydantic Model][pydantic.BaseModel] to a + [PyArrow Schema][pyarrow.Schema]. Parameters ---------- @@ -269,24 +270,25 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema: Returns ------- pyarrow.Schema + The Arrow Schema Examples -------- >>> from typing import List, Optional >>> import pydantic - >>> from lancedb.pydantic import pydantic_to_schema + >>> from lancedb.pydantic import pydantic_to_schema, Vector >>> class FooModel(pydantic.BaseModel): ... id: int ... s: str - ... vec: List[float] + ... vec: Vector(1536) # fixed_size_list[1536] ... li: List[int] ... >>> schema = pydantic_to_schema(FooModel) >>> assert schema == pa.schema([ ... pa.field("id", pa.int64(), False), ... pa.field("s", pa.utf8(), False), - ... pa.field("vec", pa.list_(pa.float64()), False), + ... pa.field("vec", pa.list_(pa.float32(), 1536)), ... pa.field("li", pa.list_(pa.int64()), False), ... ]) """ @@ -308,7 +310,7 @@ class LanceModel(pydantic.BaseModel): ... vector: Vector(2) ... >>> db = lancedb.connect("./example") - >>> table = db.create_table("test", schema=TestModel.to_arrow_schema()) + >>> table = db.create_table("test", schema=TestModel) >>> table.add([ ... TestModel(name="test", vector=[1.0, 2.0]) ... ]) diff --git a/python/python/tests/docs/test_pydantic_integration.py b/python/python/tests/docs/test_pydantic_integration.py new file mode 100644 index 00000000..d39c1903 --- /dev/null +++ b/python/python/tests/docs/test_pydantic_integration.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright The LanceDB Authors + +# --8<-- [start:imports] +import lancedb +from lancedb.pydantic import Vector, LanceModel +# --8<-- [end:imports] + + +def test_pydantic_model(tmp_path): + # --8<-- [start:base_model] + class PersonModel(LanceModel): + name: str + age: int + vector: Vector(2) + + # --8<-- [end:base_model] + + # --8<-- [start:set_url] + url = "./example" + # --8<-- [end:set_url] + url = tmp_path + + # --8<-- [start:base_example] + db = lancedb.connect(url) + table = db.create_table("person", schema=PersonModel) + table.add( + [ + PersonModel(name="bob", age=1, vector=[1.0, 2.0]), + PersonModel(name="alice", age=2, vector=[3.0, 4.0]), + ] + ) + assert table.count_rows() == 2 + person = table.search([0.0, 0.0]).limit(1).to_pydantic(PersonModel) + assert person[0].name == "bob" + # --8<-- [end:base_example]