feat: improve pydantic 1.x compat (#503)

This commit is contained in:
Chang She
2023-09-18 19:01:30 -07:00
committed by GitHub
parent 55207ce844
commit f20f19b804
4 changed files with 29 additions and 16 deletions

View File

@@ -94,5 +94,5 @@ jobs:
run: isort --check --diff --quiet .
- name: Run tests
run: pytest -m "not slow" -x -v --durations=30 tests
# - name: doctest
# run: pytest --doctest-modules lancedb
- name: doctest
run: pytest --doctest-modules lancedb

View File

@@ -84,7 +84,17 @@ A Table is a collection of Records in a LanceDB Database. You can follow along o
```
### From Pydantic Models
LanceDB supports to create Apache Arrow Schema from a Pydantic BaseModel via pydantic_to_schema() method.
When you create an empty table without data, you must specify the table schema.
LanceDB supports creating tables by specifying a pyarrow schema or a specialized
pydantic model called `LanceModel`.
For example, the following Content model specifies a table with 5 columns:
movie_id, vector, genres, title, and imdb_id. When you create a table, you can
pass the class as the value of the `schema` parameter to `create_table`.
The `vector` column is a `Vector` type, which is a specialized pydantic type that
can be configured with the vector dimensions. It is also important to note that
LanceDB only understands subclasses of `lancedb.pydantic.LanceModel`
(which itself derives from `pydantic.BaseModel`).
```python
from lancedb.pydantic import Vector, LanceModel

View File

@@ -26,6 +26,18 @@ pip install lancedb
## Embeddings
::: lancedb.embeddings.functions.EmbeddingFunctionRegistry
::: lancedb.embeddings.functions.EmbeddingFunction
::: lancedb.embeddings.functions.TextEmbeddingFunction
::: lancedb.embeddings.functions.SentenceTransformerEmbeddings
::: lancedb.embeddings.functions.OpenAIEmbeddings
::: lancedb.embeddings.functions.OpenClipEmbeddings
::: lancedb.embeddings.with_embeddings
## Context

View File

@@ -128,7 +128,7 @@ def Vector(
def validate(cls, v):
if not isinstance(v, (list, range, np.ndarray)) or len(v) != dim:
raise TypeError("A list of numbers or numpy.ndarray is needed")
return v
return cls(v)
if PYDANTIC_VERSION < (2, 0):
@@ -238,27 +238,18 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
>>> from typing import List, Optional
>>> import pydantic
>>> from lancedb.pydantic import pydantic_to_schema
...
>>> class InnerModel(pydantic.BaseModel):
... a: str
... b: Optional[float]
>>>
>>> class FooModel(pydantic.BaseModel):
... id: int
... s: Optional[str] = None
... s: str
... vec: List[float]
... li: List[int]
... inner: InnerModel
...
>>> schema = pydantic_to_schema(FooModel)
>>> assert schema == pa.schema([
... pa.field("id", pa.int64(), False),
... pa.field("s", pa.utf8(), True),
... pa.field("s", pa.utf8(), False),
... pa.field("vec", pa.list_(pa.float64()), False),
... pa.field("li", pa.list_(pa.int64()), False),
... pa.field("inner", pa.struct([
... pa.field("a", pa.utf8(), False),
... pa.field("b", pa.float64(), True),
... ]), False),
... ])
"""
fields = _pydantic_model_to_fields(model)