diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 84ec462f..2e32d803 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -94,5 +94,5 @@ jobs: run: isort --check --diff --quiet . - name: Run tests run: pytest -m "not slow" -x -v --durations=30 tests - # - name: doctest - # run: pytest --doctest-modules lancedb \ No newline at end of file + - name: doctest + run: pytest --doctest-modules lancedb \ No newline at end of file diff --git a/docs/src/guides/tables.md b/docs/src/guides/tables.md index 85fe935e..ab49ef0d 100644 --- a/docs/src/guides/tables.md +++ b/docs/src/guides/tables.md @@ -84,7 +84,17 @@ A Table is a collection of Records in a LanceDB Database. You can follow along o ``` ### From Pydantic Models - LanceDB supports to create Apache Arrow Schema from a Pydantic BaseModel via pydantic_to_schema() method. + When you create an empty table without data, you must specify the table schema. + LanceDB supports creating tables by specifying a pyarrow schema or a specialized + pydantic model called `LanceModel`. + + For example, the following Content model specifies a table with 5 columns: + movie_id, vector, genres, title, and imdb_id. When you create a table, you can + pass the class as the value of the `schema` parameter to `create_table`. + The `vector` column is a `Vector` type, which is a specialized pydantic type that + can be configured with the vector dimensions. It is also important to note that + LanceDB only understands subclasses of `lancedb.pydantic.LanceModel` + (which itself derives from `pydantic.BaseModel`). ```python from lancedb.pydantic import Vector, LanceModel diff --git a/docs/src/python/python.md b/docs/src/python/python.md index a72f9bd6..6d50b15c 100644 --- a/docs/src/python/python.md +++ b/docs/src/python/python.md @@ -26,6 +26,18 @@ pip install lancedb ## Embeddings +::: lancedb.embeddings.functions.EmbeddingFunctionRegistry + +::: lancedb.embeddings.functions.EmbeddingFunction + +::: lancedb.embeddings.functions.TextEmbeddingFunction + +::: lancedb.embeddings.functions.SentenceTransformerEmbeddings + +::: lancedb.embeddings.functions.OpenAIEmbeddings + +::: lancedb.embeddings.functions.OpenClipEmbeddings + ::: lancedb.embeddings.with_embeddings ## Context diff --git a/python/lancedb/pydantic.py b/python/lancedb/pydantic.py index 958b8a83..abe97068 100644 --- a/python/lancedb/pydantic.py +++ b/python/lancedb/pydantic.py @@ -128,7 +128,7 @@ def Vector( def validate(cls, v): if not isinstance(v, (list, range, np.ndarray)) or len(v) != dim: raise TypeError("A list of numbers or numpy.ndarray is needed") - return v + return cls(v) if PYDANTIC_VERSION < (2, 0): @@ -238,27 +238,18 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema: >>> from typing import List, Optional >>> import pydantic >>> from lancedb.pydantic import pydantic_to_schema - ... - >>> class InnerModel(pydantic.BaseModel): - ... a: str - ... b: Optional[float] - >>> >>> class FooModel(pydantic.BaseModel): ... id: int - ... s: Optional[str] = None + ... s: str ... vec: List[float] ... li: List[int] - ... inner: InnerModel + ... >>> schema = pydantic_to_schema(FooModel) >>> assert schema == pa.schema([ ... pa.field("id", pa.int64(), False), - ... pa.field("s", pa.utf8(), True), + ... pa.field("s", pa.utf8(), False), ... pa.field("vec", pa.list_(pa.float64()), False), ... pa.field("li", pa.list_(pa.int64()), False), - ... pa.field("inner", pa.struct([ - ... pa.field("a", pa.utf8(), False), - ... pa.field("b", pa.float64(), True), - ... ]), False), ... ]) """ fields = _pydantic_model_to_fields(model)