[python] Bump version: 0.1.15 → 0.1.16

make pandas an optional dependency in lancedb as well (#385 )
Improve pydantic integration (#384 )
2025-12-24 05:49:57 +00:00 · 2023-07-31 18:32:40 +00:00 · 2023-07-31 14:08:58 -04:00 · 2023-07-31 12:16:44 -04:00 · 2023-07-31 10:25:09 +02:00 · 2023-07-28 13:15:21 -07:00
27 changed files with 363 additions and 132 deletions
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -30,7 +30,7 @@ jobs:
        python-version: 3.${{ matrix.python-minor-version }}
    - name: Install lancedb
      run: |
-        pip install -e .
+        pip install -e .[tests]
        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest pytest-mock black isort
    - name: Black
@@ -59,7 +59,7 @@ jobs:
        python-version: "3.11"
    - name: Install lancedb
      run: |
-        pip install -e .
+        pip install -e .[tests]
        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest pytest-mock black
    - name: Black
--- a/docs/src/python/pydantic.md
+++ b/docs/src/python/pydantic.md
@@ -1,6 +1,8 @@
 # Pydantic

 [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
+LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
+

 ## Schema

--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.1.18",
+  "version": "0.1.19",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.1.18",
+      "version": "0.1.19",
      "cpu": [
        "x64",
        "arm64"
@@ -51,11 +51,11 @@
        "typescript": "*"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.1.18",
-        "@lancedb/vectordb-darwin-x64": "0.1.18",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
-        "@lancedb/vectordb-linux-x64-gnu": "0.1.18",
-        "@lancedb/vectordb-win32-x64-msvc": "0.1.18"
+        "@lancedb/vectordb-darwin-arm64": "0.1.19",
+        "@lancedb/vectordb-darwin-x64": "0.1.19",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
+        "@lancedb/vectordb-linux-x64-gnu": "0.1.19",
+        "@lancedb/vectordb-win32-x64-msvc": "0.1.19"
      }
    },
    "node_modules/@apache-arrow/ts": {
@@ -315,9 +315,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
-      "integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
+      "integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
      "cpu": [
        "arm64"
      ],
@@ -327,9 +327,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
-      "integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
+      "integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
      "cpu": [
        "x64"
      ],
@@ -339,9 +339,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
-      "integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
+      "integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
      "cpu": [
        "arm64"
      ],
@@ -351,9 +351,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
-      "integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
+      "integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
      "cpu": [
        "x64"
      ],
@@ -363,9 +363,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
-      "integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
+      "integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
      "cpu": [
        "x64"
      ],
@@ -4852,33 +4852,33 @@
      }
    },
    "@lancedb/vectordb-darwin-arm64": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
-      "integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
+      "integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
      "optional": true
    },
    "@lancedb/vectordb-darwin-x64": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
-      "integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
+      "integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
      "optional": true
    },
    "@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
-      "integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
+      "integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
      "optional": true
    },
    "@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
-      "integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
+      "integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
      "optional": true
    },
    "@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.1.18",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
-      "integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
+      "integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
      "optional": true
    },
    "@neon-rs/cli": {
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -250,6 +250,14 @@ describe('LanceDB client', function () {
      const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
      await expect(createIndex).to.be.rejectedWith(/VectorIndex requires the column data type to be fixed size list of float32s/)
    })
+
+    it('it should fail when the column is not a vector', async function () {
+      const uri = await createTestDB(32, 300)
+      const con = await lancedb.connect(uri)
+      const table = await con.openTable('vectors')
+      const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 })
+      await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0')
+    })
  })

  describe('when using a custom embedding function', function () {
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.15
+current_version = 0.1.16
 commit = True
 message = [python] Bump version: {current_version} → {new_version}
 tag = True
--- a/python/lancedb/common.py
+++ b/python/lancedb/common.py
@@ -11,17 +11,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from pathlib import Path
-from typing import List, Union
+from typing import Iterable, List, Union

 import numpy as np
-import pandas as pd
 import pyarrow as pa

+from .util import safe_import_pandas
+
+pd = safe_import_pandas()
+
+DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
 VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
 URI = Union[str, Path]
-
-# TODO support generator
-DATA = Union[List[dict], dict, pd.DataFrame]
 VECTOR_COLUMN_NAME = "vector"


--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -12,12 +12,13 @@
 #  limitations under the License.
 from __future__ import annotations

-import pandas as pd
-
 from .exceptions import MissingColumnError, MissingValueError
+from .util import safe_import_pandas
+
+pd = safe_import_pandas()


-def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
+def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    """Create a Contextualizer object for the given DataFrame.

    Used to create context windows. Context windows are rolling subsets of text
@@ -175,8 +176,12 @@ class Contextualizer:
        self._min_window_size = min_window_size
        return self

-    def to_df(self) -> pd.DataFrame:
+    def to_df(self) -> "pd.DataFrame":
        """Create the context windows and return a DataFrame."""
+        if pd is None:
+            raise ImportError(
+                "pandas is required to create context windows using lancedb"
+            )

        if self._text_col not in self._raw_df.columns.tolist():
            raise MissingColumnError(self._text_col)
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -16,9 +16,8 @@ from __future__ import annotations
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Tuple, Union
+from typing import Optional

-import pandas as pd
 import pyarrow as pa
 from pyarrow import fs

@@ -39,9 +38,7 @@ class DBConnection(ABC):
    def create_table(
        self,
        name: str,
-        data: Optional[
-            Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
-        ] = None,
+        data: Optional[DATA] = None,
        schema: Optional[pa.Schema] = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -279,7 +276,7 @@ class LanceDBConnection(DBConnection):
    def create_table(
        self,
        name: str,
-        data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
+        data: Optional[DATA] = None,
        schema: pa.Schema = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -319,14 +316,20 @@ class LanceDBConnection(DBConnection):
        """
        return LanceTable.open(self, name)

-    def drop_table(self, name: str):
+    def drop_table(self, name: str, ignore_missing: bool = False):
        """Drop a table from the database.

        Parameters
        ----------
        name: str
            The name of the table.
+        ignore_missing: bool, default False
+            If True, ignore if the table does not exist.
        """
-        filesystem, path = fs_from_uri(self.uri)
-        table_path = os.path.join(path, name + ".lance")
-        filesystem.delete_dir(table_path)
+        try:
+            filesystem, path = fs_from_uri(self.uri)
+            table_path = os.path.join(path, name + ".lance")
+            filesystem.delete_dir(table_path)
+        except FileNotFoundError:
+            if not ignore_missing:
+                raise
--- a/python/lancedb/embeddings.py
+++ b/python/lancedb/embeddings.py
@@ -16,15 +16,19 @@ import sys
 from typing import Callable, Union

 import numpy as np
-import pandas as pd
 import pyarrow as pa
 from lance.vector import vec_to_table
 from retry import retry

+from .util import safe_import_pandas
+
+pd = safe_import_pandas()
+DATA = Union[pa.Table, "pd.DataFrame"]
+

 def with_embeddings(
    func: Callable,
-    data: Union[pa.Table, pd.DataFrame],
+    data: DATA,
    column: str = "text",
    wrap_api: bool = True,
    show_progress: bool = False,
@@ -60,7 +64,7 @@ def with_embeddings(
    func = func.batch_size(batch_size)
    if show_progress:
        func = func.show_progress()
-    if isinstance(data, pd.DataFrame):
+    if pd is not None and isinstance(data, pd.DataFrame):
        data = pa.Table.from_pandas(data, preserve_index=False)
    embeddings = func(data[column].to_numpy())
    table = vec_to_table(np.array(embeddings))
--- a/python/lancedb/pydantic.py
+++ b/python/lancedb/pydantic.py
@@ -249,3 +249,36 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
    """
    fields = _pydantic_model_to_fields(model)
    return pa.schema(fields)
+
+
+class LanceModel(pydantic.BaseModel):
+    """
+    A Pydantic Model base class that can be converted to a LanceDB Table.
+
+    Examples
+    --------
+    >>> import lancedb
+    >>> from lancedb.pydantic import LanceModel, vector
+    >>>
+    >>> class TestModel(LanceModel):
+    ...     name: str
+    ...     vector: vector(2)
+    ...
+    >>> db = lancedb.connect("/tmp")
+    >>> table = db.create_table("test", schema=TestModel.to_arrow_schema())
+    >>> table.add([
+    ...     TestModel(name="test", vector=[1.0, 2.0])
+    ... ])
+    >>> table.search([0., 0.]).limit(1).to_pydantic(TestModel)
+    [TestModel(name='test', vector=FixedSizeList(dim=2))]
+    """
+
+    @classmethod
+    def to_arrow_schema(cls):
+        return pydantic_to_schema(cls)
+
+    @classmethod
+    def field_names(cls) -> List[str]:
+        if PYDANTIC_VERSION.major < 2:
+            return list(cls.__fields__.keys())
+        return list(cls.model_fields.keys())
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -13,17 +13,20 @@

 from __future__ import annotations

-from typing import List, Literal, Optional, Union
+from typing import List, Literal, Optional, Type, Union

 import numpy as np
-import pandas as pd
 import pyarrow as pa
-from pydantic import BaseModel
+import pydantic

 from .common import VECTOR_COLUMN_NAME
+from .pydantic import LanceModel
+from .util import safe_import_pandas
+
+pd = safe_import_pandas()


-class Query(BaseModel):
+class Query(pydantic.BaseModel):
    """A Query"""

    vector_column: str = VECTOR_COLUMN_NAME
@@ -198,7 +201,7 @@ class LanceQueryBuilder:
        self._refine_factor = refine_factor
        return self

-    def to_df(self) -> pd.DataFrame:
+    def to_df(self) -> "pd.DataFrame":
        """
        Execute the query and return the results as a pandas DataFrame.
        In addition to the selected columns, LanceDB also returns a vector
@@ -230,9 +233,26 @@ class LanceQueryBuilder:
        )
        return self._table._execute_query(query)

+    def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
+        """Return the table as a list of pydantic models.
+
+        Parameters
+        ----------
+        model: Type[LanceModel]
+            The pydantic model to use.
+
+        Returns
+        -------
+        List[LanceModel]
+        """
+        return [
+            model(**{k: v for k, v in row.items() if k in model.field_names()})
+            for row in self.to_arrow().to_pylist()
+        ]
+

 class LanceFtsQueryBuilder(LanceQueryBuilder):
-    def to_arrow(self) -> pd.Table:
+    def to_arrow(self) -> pa.Table:
        try:
            import tantivy
        except ImportError:
--- a/python/lancedb/remote/db.py
+++ b/python/lancedb/remote/db.py
@@ -20,7 +20,6 @@ import pyarrow as pa

 from lancedb.common import DATA
 from lancedb.db import DBConnection
-from lancedb.schema import schema_to_json
 from lancedb.table import Table, _sanitize_data

 from .arrow import to_ipc_binary
--- a/python/lancedb/remote/table.py
+++ b/python/lancedb/remote/table.py
@@ -16,11 +16,11 @@ from functools import cached_property
 from typing import Union

 import pyarrow as pa
+from lance import json_to_schema

 from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME

-from ..query import LanceQueryBuilder, Query
-from ..schema import json_to_schema
+from ..query import LanceQueryBuilder
 from ..table import Query, Table, _sanitize_data
 from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE
--- a/python/lancedb/schema.py
+++ b/python/lancedb/schema.py
@@ -12,11 +12,7 @@
 #  limitations under the License.

 """Schema related utilities."""
-
-from typing import Any, Dict, Type
-
 import pyarrow as pa
-from lance import json_to_schema, schema_to_json


 def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -20,26 +20,32 @@ from typing import Iterable, List, Union

 import lance
 import numpy as np
-import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
 from lance import LanceDataset
 from lance.vector import vec_to_table

 from .common import DATA, VEC, VECTOR_COLUMN_NAME
+from .pydantic import LanceModel
 from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
-from .util import fs_from_uri
+from .util import fs_from_uri, safe_import_pandas
+
+pd = safe_import_pandas()


 def _sanitize_data(data, schema, on_bad_vectors, fill_value):
    if isinstance(data, list):
+        # convert to list of dict if data is a bunch of LanceModels
+        if isinstance(data[0], LanceModel):
+            schema = data[0].__class__.to_arrow_schema()
+            data = [dict(d) for d in data]
        data = pa.Table.from_pylist(data)
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
        )
    if isinstance(data, dict):
        data = vec_to_table(data)
-    if isinstance(data, pd.DataFrame):
+    if pd is not None and isinstance(data, pd.DataFrame):
        data = pa.Table.from_pandas(data)
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
@@ -94,7 +100,7 @@ class Table(ABC):
        """
        raise NotImplementedError

-    def to_pandas(self) -> pd.DataFrame:
+    def to_pandas(self):
        """Return the table as a pandas DataFrame.

        Returns
@@ -328,7 +334,7 @@ class LanceTable(Table):
        """Return the first n rows of the table."""
        return self._dataset.head(n)

-    def to_pandas(self) -> pd.DataFrame:
+    def to_pandas(self) -> "pd.DataFrame":
        """Return the table as a pandas DataFrame.

        Returns
--- a/python/lancedb/util.py
+++ b/python/lancedb/util.py
@@ -15,7 +15,6 @@ import os
 from typing import Tuple
 from urllib.parse import urlparse

-import pyarrow as pa
 import pyarrow.fs as pa_fs


@@ -76,3 +75,12 @@ def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
        return fs, path

    return pa_fs.FileSystem.from_uri(uri)
+
+
+def safe_import_pandas():
+    try:
+        import pandas as pd
+
+        return pd
+    except ImportError:
+        return None
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "lancedb"
-version = "0.1.15"
-dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
+version = "0.1.16"
+dependencies = ["pylance==0.5.10", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
 description = "lancedb"
 authors = [
    { name = "LanceDB Devs", email = "dev@lancedb.com" },
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 tests = [
-    "pytest", "pytest-mock", "pytest-asyncio"
+    "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio"
 ]
 dev = [
    "ruff", "pre-commit", "black"
--- a/python/tests/test_db.py
+++ b/python/tests/test_db.py
@@ -149,6 +149,10 @@ def test_delete_table(tmp_path):
    db.create_table("test", data=data)
    assert db.table_names() == ["test"]

+    # dropping a table that does not exist should pass
+    # if ignore_missing=True
+    db.drop_table("does_not_exist", ignore_missing=True)
+

 def test_empty_or_nonexistent_table(tmp_path):
    db = lancedb.connect(tmp_path)
--- a/python/tests/test_pydantic.py
+++ b/python/tests/test_pydantic.py
@@ -20,7 +20,7 @@ import pyarrow as pa
 import pydantic
 import pytest

-from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
+from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, pydantic_to_schema, vector


@pytest.mark.skipif(
@@ -163,3 +163,13 @@ def test_fixed_size_list_validation():
        TestModel(vec=range(7))

    TestModel(vec=range(8))
+
+
+def test_lance_model():
+    class TestModel(LanceModel):
+        vec: vector(16)
+        li: List[int]
+
+    schema = pydantic_to_schema(TestModel)
+    assert schema == TestModel.to_arrow_schema()
+    assert TestModel.field_names() == ["vec", "li"]
--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -20,6 +20,7 @@ import pyarrow as pa
 import pytest

 from lancedb.db import LanceDBConnection
+from lancedb.pydantic import LanceModel, vector
 from lancedb.query import LanceQueryBuilder, Query
 from lancedb.table import LanceTable

@@ -64,6 +65,24 @@ def table(tmp_path) -> MockTable:
    return MockTable(tmp_path)


+def test_cast(table):
+    class TestModel(LanceModel):
+        vector: vector(2)
+        id: int
+        str_field: str
+        float_field: float
+
+    q = LanceQueryBuilder(table, [0, 0], "vector").limit(1)
+    results = q.to_pydantic(TestModel)
+    assert len(results) == 1
+    r0 = results[0]
+    assert isinstance(r0, TestModel)
+    assert r0.id == 1
+    assert r0.vector == [1, 2]
+    assert r0.str_field == "a"
+    assert r0.float_field == 1.0
+
+
 def test_query_builder(table):
    df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
    assert df["id"].values[0] == 1
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -13,15 +13,16 @@

 import functools
 from pathlib import Path
+from typing import List
 from unittest.mock import PropertyMock, patch

 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
-from lance.vector import vec_to_table

 from lancedb.db import LanceDBConnection
+from lancedb.pydantic import LanceModel, vector
 from lancedb.table import LanceTable


@@ -135,6 +136,17 @@ def test_add(db):
    _add(table, schema)


+def test_add_pydantic_model(db):
+    class TestModel(LanceModel):
+        vector: vector(16)
+        li: List[int]
+
+    data = TestModel(vector=list(range(16)), li=[1, 2, 3])
+    table = LanceTable.create(db, "test", data=[data])
+    assert len(table) == 1
+    assert table.schema == TestModel.to_arrow_schema()
+
+
 def _add(table, schema):
    # table = LanceTable(db, "test")
    assert len(table) == 2
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -13,6 +13,7 @@ crate-type = ["cdylib"]
 arrow-array = { workspace = true }
 arrow-ipc = { workspace = true }
 arrow-schema = { workspace = true }
+conv = "0.3.3"
 once_cell = "1"
 futures = "0.3"
 half = { workspace = true }
--- a/rust/ffi/node/src/error.rs
+++ b/rust/ffi/node/src/error.rs
@@ -22,8 +22,15 @@ use snafu::Snafu;
 pub enum Error {
    #[snafu(display("column '{name}' is missing"))]
    MissingColumn { name: String },
+    #[snafu(display("{name}: {message}"))]
+    RangeError { name: String, message: String },
+    #[snafu(display("{index_type} is not a valid index type"))]
+    InvalidIndexType { index_type: String },
+
    #[snafu(display("{message}"))]
    LanceDB { message: String },
+    #[snafu(display("{message}"))]
+    Neon { message: String },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -52,6 +59,14 @@ impl From<ArrowError> for Error {
    }
 }

+impl From<neon::result::Throw> for Error {
+    fn from(value: neon::result::Throw) -> Self {
+        Self::Neon {
+            message: value.to_string(),
+        }
+    }
+}
+
 /// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
 /// so it can be returned as a JavaScript error
 /// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
--- a/rust/ffi/node/src/index/vector.rs
+++ b/rust/ffi/node/src/index/vector.rs
@@ -22,12 +22,15 @@ use neon::prelude::*;

 use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};

+use crate::error::Error::InvalidIndexType;
+use crate::error::ResultExt;
+use crate::neon_ext::js_object_ext::JsObjectExt;
 use crate::{runtime, JsTable};

 pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
    let index_params = cx.argument::<JsObject>(0)?;
-    let index_params_builder = get_index_params_builder(&mut cx, index_params).unwrap();
+    let index_params_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;

    let rt = runtime(&mut cx)?;
    let channel = cx.channel();
@@ -54,27 +57,21 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
 fn get_index_params_builder(
    cx: &mut FunctionContext,
    obj: Handle<JsObject>,
-) -> Result<impl VectorIndexBuilder, String> {
-    let idx_type = obj
-        .get::<JsString, _, _>(cx, "type")
-        .map_err(|t| t.to_string())?
-        .value(cx);
+) -> crate::error::Result<impl VectorIndexBuilder> {
+    let idx_type = obj.get::<JsString, _, _>(cx, "type")?.value(cx);

    match idx_type.as_str() {
        "ivf_pq" => {
            let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
            let mut pq_params = PQBuildParams::default();

-            obj.get_opt::<JsString, _, _>(cx, "column")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsString, _, _>(cx, "column")?
                .map(|s| index_builder.column(s.value(cx)));

-            obj.get_opt::<JsString, _, _>(cx, "index_name")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsString, _, _>(cx, "index_name")?
                .map(|s| index_builder.index_name(s.value(cx)));

-            obj.get_opt::<JsString, _, _>(cx, "metric_type")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsString, _, _>(cx, "metric_type")?
                .map(|s| MetricType::try_from(s.value(cx).as_str()))
                .map(|mt| {
                    let metric_type = mt.unwrap();
@@ -82,15 +79,8 @@ fn get_index_params_builder(
                    pq_params.metric_type = metric_type;
                });

-            let num_partitions = obj
-                .get_opt::<JsNumber, _, _>(cx, "num_partitions")
-                .map_err(|t| t.to_string())?
-                .map(|s| s.value(cx) as usize);
-
-            let max_iters = obj
-                .get_opt::<JsNumber, _, _>(cx, "max_iters")
-                .map_err(|t| t.to_string())?
-                .map(|s| s.value(cx) as usize);
+            let num_partitions = obj.get_opt_usize(cx, "num_partitions")?;
+            let max_iters = obj.get_opt_usize(cx, "max_iters")?;

            num_partitions.map(|np| {
                let max_iters = max_iters.unwrap_or(50);
@@ -102,32 +92,28 @@ fn get_index_params_builder(
                index_builder.ivf_params(ivf_params)
            });

-            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")?
                .map(|s| pq_params.use_opq = s.value(cx));

-            obj.get_opt::<JsNumber, _, _>(cx, "num_sub_vectors")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.num_sub_vectors = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "num_sub_vectors")?
+                .map(|s| pq_params.num_sub_vectors = s);

-            obj.get_opt::<JsNumber, _, _>(cx, "num_bits")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.num_bits = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "num_bits")?
+                .map(|s| pq_params.num_bits = s);

-            obj.get_opt::<JsNumber, _, _>(cx, "max_iters")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.max_iters = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "max_iters")?
+                .map(|s| pq_params.max_iters = s);

-            obj.get_opt::<JsNumber, _, _>(cx, "max_opq_iters")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "max_opq_iters")?
+                .map(|s| pq_params.max_opq_iters = s);

-            obj.get_opt::<JsBoolean, _, _>(cx, "replace")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsBoolean, _, _>(cx, "replace")?
                .map(|s| index_builder.replace(s.value(cx)));

            Ok(index_builder)
        }
-        t => Err(format!("{} is not a valid index type", t).to_string()),
+        index_type => Err(InvalidIndexType {
+            index_type: index_type.into(),
+        }),
    }
 }
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -31,16 +31,17 @@ use once_cell::sync::OnceCell;
 use tokio::runtime::Runtime;

 use vectordb::database::Database;
-use vectordb::error::Error;
 use vectordb::table::{ReadParams, Table};

 use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
 use crate::error::ResultExt;
+use crate::neon_ext::js_object_ext::JsObjectExt;

 mod arrow;
 mod convert;
 mod error;
 mod index;
+mod neon_ext;

 struct JsDatabase {
    database: Arc<Database>,
@@ -245,12 +246,9 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
        .get_opt::<JsString, _, _>(&mut cx, "_filter")?
        .map(|s| s.value(&mut cx));
    let refine_factor = query_obj
-        .get_opt::<JsNumber, _, _>(&mut cx, "_refineFactor")?
-        .map(|s| s.value(&mut cx))
-        .map(|i| i as u32);
-    let nprobes = query_obj
-        .get::<JsNumber, _, _>(&mut cx, "_nprobes")?
-        .value(&mut cx) as usize;
+        .get_opt_u32(&mut cx, "_refineFactor")
+        .or_throw(&mut cx)?;
+    let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
    let metric_type = query_obj
        .get_opt::<JsString, _, _>(&mut cx, "_metricType")?
        .map(|s| s.value(&mut cx))
@@ -277,7 +275,11 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
            .select(select);
        let record_batch_stream = builder.execute();
        let results = record_batch_stream
-            .and_then(|stream| stream.try_collect::<Vec<_>>().map_err(Error::from))
+            .and_then(|stream| {
+                stream
+                    .try_collect::<Vec<_>>()
+                    .map_err(vectordb::error::Error::from)
+            })
            .await;

        deferred.settle_with(&channel, move |mut cx| {
--- a/rust/ffi/node/src/neon_ext.rs
+++ b/rust/ffi/node/src/neon_ext.rs
@@ -0,0 +1,15 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod js_object_ext;
--- a/rust/ffi/node/src/neon_ext/js_object_ext.rs
+++ b/rust/ffi/node/src/neon_ext/js_object_ext.rs
@@ -0,0 +1,82 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::error::{Error, Result};
+use neon::prelude::*;
+
+// extends neon's [JsObject] with helper functions to extract properties
+pub trait JsObjectExt {
+    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
+    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
+    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
+}
+
+impl JsObjectExt for JsObject {
+    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>> {
+        let val_opt = self
+            .get_opt::<JsNumber, _, _>(cx, key)?
+            .map(|s| f64_to_u32_safe(s.value(cx), key));
+        val_opt.transpose()
+    }
+
+    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize> {
+        let val = self.get::<JsNumber, _, _>(cx, key)?.value(cx);
+        f64_to_usize_safe(val, key)
+    }
+
+    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>> {
+        let val_opt = self
+            .get_opt::<JsNumber, _, _>(cx, key)?
+            .map(|s| f64_to_usize_safe(s.value(cx), key));
+        val_opt.transpose()
+    }
+}
+
+fn f64_to_u32_safe(n: f64, key: &str) -> Result<u32> {
+    use conv::*;
+
+    n.approx_as::<u32>().map_err(|e| match e {
+        FloatError::NegOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: "must be > 0".to_string(),
+        },
+        FloatError::PosOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: format!("must be < {}", u32::MAX),
+        },
+        FloatError::NotANumber(_) => Error::RangeError {
+            name: key.into(),
+            message: "not a valid number".to_string(),
+        },
+    })
+}
+
+fn f64_to_usize_safe(n: f64, key: &str) -> Result<usize> {
+    use conv::*;
+
+    n.approx_as::<usize>().map_err(|e| match e {
+        FloatError::NegOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: "must be > 0".to_string(),
+        },
+        FloatError::PosOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: format!("must be < {}", usize::MAX),
+        },
+        FloatError::NotANumber(_) => Error::RangeError {
+            name: key.into(),
+            message: "not a valid number".to_string(),
+        },
+    })
+}
Author	SHA1	Message	Date
Lance Release	b06e214d29	[python] Bump version: 0.1.15 → 0.1.16	2023-07-31 18:32:40 +00:00
Chang She	c1f8feb6ed	make pandas an optional dependency in lancedb as well (#385 )	2023-07-31 14:08:58 -04:00
Chang She	cada35d5b7	Improve pydantic integration (#384 )	2023-07-31 12:16:44 -04:00
Chang She	2d25c263e9	Implement drop table if exists (#383 )	2023-07-31 10:25:09 +02:00
gsilvestrin	bcd7f66dc7	fix(node): Handle overflows in the node bridge (#372 ) - Fixes many numeric conversions that results in hard to reproduce issues - JsObjectExt extends JsObject with safe methods to extract numericvalues	2023-07-28 13:15:21 -07:00
gsilvestrin	1daecac648	fix(python): Pin pylance and add pandas as test dependency (#373 )	2023-07-27 15:21:45 -07:00
Lance Release	b8e656b2a7	Updating package-lock.json	2023-07-27 21:53:30 +00:00
Lance Release	ff7c1193a7	Updating package-lock.json	2023-07-27 21:06:32 +00:00