[python] Bump version: 0.1.15 → 0.1.16

make pandas an optional dependency in lancedb as well (#385 )
Improve pydantic integration (#384 )
2026-01-05 11:22:58 +00:00 · 2023-07-31 18:32:40 +00:00 · 2023-07-31 14:08:58 -04:00 · 2023-07-31 12:16:44 -04:00 · 2023-07-31 10:25:09 +02:00 · 2023-07-28 13:15:21 -07:00
27 changed files with 363 additions and 132 deletions
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -30,7 +30,7 @@ jobs:
        python-version: 3.${{ matrix.python-minor-version }}
    - name: Install lancedb
      run: |
-        pip install -e .
+        pip install -e .[tests]
        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest pytest-mock black isort
    - name: Black
@@ -59,7 +59,7 @@ jobs:
        python-version: "3.11"
    - name: Install lancedb
      run: |
-        pip install -e .
+        pip install -e .[tests]
        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest pytest-mock black
    - name: Black
--- a/docs/src/python/pydantic.md
+++ b/docs/src/python/pydantic.md
@@ -1,6 +1,8 @@
 # Pydantic
 [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
 LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
 ## Schema
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.1.18",
+  "version": "0.1.19",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.1.18",
+      "version": "0.1.19",
      "cpu": [
        "x64",
        "arm64"
@@ -51,11 +51,11 @@
        "typescript": "*"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.1.18",
+        "@lancedb/vectordb-darwin-arm64": "0.1.19",
-        "@lancedb/vectordb-darwin-x64": "0.1.18",
+        "@lancedb/vectordb-darwin-x64": "0.1.19",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
-        "@lancedb/vectordb-linux-x64-gnu": "0.1.18",
+        "@lancedb/vectordb-linux-x64-gnu": "0.1.19",
-        "@lancedb/vectordb-win32-x64-msvc": "0.1.18"
+        "@lancedb/vectordb-win32-x64-msvc": "0.1.19"
      }
    },
    "node_modules/@apache-arrow/ts": {
@@ -315,9 +315,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
-      "integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
+      "integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
      "cpu": [
        "arm64"
      ],
@@ -327,9 +327,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
-      "integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
+      "integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
      "cpu": [
        "x64"
      ],
@@ -339,9 +339,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
-      "integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
+      "integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
      "cpu": [
        "arm64"
      ],
@@ -351,9 +351,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
-      "integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
+      "integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
      "cpu": [
        "x64"
      ],
@@ -363,9 +363,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
-      "integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
+      "integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
      "cpu": [
        "x64"
      ],
@@ -4852,33 +4852,33 @@
      }
    },
    "@lancedb/vectordb-darwin-arm64": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
-      "integrity": "sha512-vu8MCFgaAAGmTJF+4RaoApROMpRVVgrCk+V9my4adAfWkkXbSmtxiDgiIwwL1VqdGb8UwzGn3kVbNW7idE1ojA==",
+      "integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
      "optional": true
    },
    "@lancedb/vectordb-darwin-x64": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
-      "integrity": "sha512-ZU30bd6frRyKJ515ow972PlqO2wIiNT4Ohor9+KbUwl/VKDyAwKOKG8cWhRJXTxk0k1oqpiJ6+Q28TcYJ0sSAw==",
+      "integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
      "optional": true
    },
    "@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
-      "integrity": "sha512-2UroC026bUYwyciSRonYlXei0SoYbKgfWpozxYOu7GgBAV2CQQtaAPgWJTEl6ZiCNeBmBTx+j0h3+ydUfZA73Q==",
+      "integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
      "optional": true
    },
    "@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
-      "integrity": "sha512-DoQBskl22JAJFZh219ZOJ6o+f1niTZp0qRYngHa/kTIpLKzHWQ0OTtMCz32VBAjAsKjSLNxHE8rrT/S6tvS7KQ==",
+      "integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
      "optional": true
    },
    "@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.1.18",
+      "version": "0.1.19",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.18.tgz",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
-      "integrity": "sha512-a/kUM3V6rWuXS80pPECYxKfCUAnq56Of/GPCvnAkpk9C9ldyX10iff4aA6DiPHjEk9V2ytqDfJKl9N3QcMLKLA==",
+      "integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
      "optional": true
    },
    "@neon-rs/cli": {
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -250,6 +250,14 @@ describe('LanceDB client', function () {
      const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
      await expect(createIndex).to.be.rejectedWith(/VectorIndex requires the column data type to be fixed size list of float32s/)
    })
    it('it should fail when the column is not a vector', async function () {
      const uri = await createTestDB(32, 300)
      const con = await lancedb.connect(uri)
      const table = await con.openTable('vectors')
      const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 })
      await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0')
    })
  })
  describe('when using a custom embedding function', function () {
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.15
+current_version = 0.1.16
 commit = True
 message = [python] Bump version: {current_version} → {new_version}
 tag = True
--- a/python/lancedb/common.py
+++ b/python/lancedb/common.py
@@ -11,17 +11,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from pathlib import Path
-from typing import List, Union
+from typing import Iterable, List, Union
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 from .util import safe_import_pandas
 pd = safe_import_pandas()
 DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
 VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
 URI = Union[str, Path]
 # TODO support generator
 DATA = Union[List[dict], dict, pd.DataFrame]
 VECTOR_COLUMN_NAME = "vector"
--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -12,12 +12,13 @@
 #  limitations under the License.
 from __future__ import annotations
 import pandas as pd
 from .exceptions import MissingColumnError, MissingValueError
 from .util import safe_import_pandas
 pd = safe_import_pandas()
-def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
+def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    """Create a Contextualizer object for the given DataFrame.
    Used to create context windows. Context windows are rolling subsets of text
@@ -175,8 +176,12 @@ class Contextualizer:
        self._min_window_size = min_window_size
        return self
-    def to_df(self) -> pd.DataFrame:
+    def to_df(self) -> "pd.DataFrame":
        """Create the context windows and return a DataFrame."""
        if pd is None:
            raise ImportError(
                "pandas is required to create context windows using lancedb"
            )
        if self._text_col not in self._raw_df.columns.tolist():
            raise MissingColumnError(self._text_col)
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -16,9 +16,8 @@ from __future__ import annotations
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Tuple, Union
+from typing import Optional
 import pandas as pd
 import pyarrow as pa
 from pyarrow import fs
@@ -39,9 +38,7 @@ class DBConnection(ABC):
    def create_table(
        self,
        name: str,
-        data: Optional[
+        data: Optional[DATA] = None,
            Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
        ] = None,
        schema: Optional[pa.Schema] = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -279,7 +276,7 @@ class LanceDBConnection(DBConnection):
    def create_table(
        self,
        name: str,
-        data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
+        data: Optional[DATA] = None,
        schema: pa.Schema = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -319,14 +316,20 @@ class LanceDBConnection(DBConnection):
        """
        return LanceTable.open(self, name)
-    def drop_table(self, name: str):
+    def drop_table(self, name: str, ignore_missing: bool = False):
        """Drop a table from the database.
        Parameters
        ----------
        name: str
            The name of the table.
        ignore_missing: bool, default False
            If True, ignore if the table does not exist.
        """
-        filesystem, path = fs_from_uri(self.uri)
+        try:
-        table_path = os.path.join(path, name + ".lance")
+            filesystem, path = fs_from_uri(self.uri)
-        filesystem.delete_dir(table_path)
+            table_path = os.path.join(path, name + ".lance")
            filesystem.delete_dir(table_path)
        except FileNotFoundError:
            if not ignore_missing:
                raise
--- a/python/lancedb/embeddings.py
+++ b/python/lancedb/embeddings.py
@@ -16,15 +16,19 @@ import sys
 from typing import Callable, Union
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 from lance.vector import vec_to_table
 from retry import retry
 from .util import safe_import_pandas
 pd = safe_import_pandas()
 DATA = Union[pa.Table, "pd.DataFrame"]
 def with_embeddings(
    func: Callable,
-    data: Union[pa.Table, pd.DataFrame],
+    data: DATA,
    column: str = "text",
    wrap_api: bool = True,
    show_progress: bool = False,
@@ -60,7 +64,7 @@ def with_embeddings(
    func = func.batch_size(batch_size)
    if show_progress:
        func = func.show_progress()
-    if isinstance(data, pd.DataFrame):
+    if pd is not None and isinstance(data, pd.DataFrame):
        data = pa.Table.from_pandas(data, preserve_index=False)
    embeddings = func(data[column].to_numpy())
    table = vec_to_table(np.array(embeddings))
--- a/python/lancedb/pydantic.py
+++ b/python/lancedb/pydantic.py
@@ -249,3 +249,36 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
    """
    fields = _pydantic_model_to_fields(model)
    return pa.schema(fields)
 class LanceModel(pydantic.BaseModel):
    """
    A Pydantic Model base class that can be converted to a LanceDB Table.
    Examples
    --------
    >>> import lancedb
    >>> from lancedb.pydantic import LanceModel, vector
    >>>
    >>> class TestModel(LanceModel):
    ...     name: str
    ...     vector: vector(2)
    ...
    >>> db = lancedb.connect("/tmp")
    >>> table = db.create_table("test", schema=TestModel.to_arrow_schema())
    >>> table.add([
    ...     TestModel(name="test", vector=[1.0, 2.0])
    ... ])
    >>> table.search([0., 0.]).limit(1).to_pydantic(TestModel)
    [TestModel(name='test', vector=FixedSizeList(dim=2))]
    """
    @classmethod
    def to_arrow_schema(cls):
        return pydantic_to_schema(cls)
    @classmethod
    def field_names(cls) -> List[str]:
        if PYDANTIC_VERSION.major < 2:
            return list(cls.__fields__.keys())
        return list(cls.model_fields.keys())
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -13,17 +13,20 @@
 from __future__ import annotations
-from typing import List, Literal, Optional, Union
+from typing import List, Literal, Optional, Type, Union
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from pydantic import BaseModel
+import pydantic
 from .common import VECTOR_COLUMN_NAME
 from .pydantic import LanceModel
 from .util import safe_import_pandas
 pd = safe_import_pandas()
-class Query(BaseModel):
+class Query(pydantic.BaseModel):
    """A Query"""
    vector_column: str = VECTOR_COLUMN_NAME
@@ -198,7 +201,7 @@ class LanceQueryBuilder:
        self._refine_factor = refine_factor
        return self
-    def to_df(self) -> pd.DataFrame:
+    def to_df(self) -> "pd.DataFrame":
        """
        Execute the query and return the results as a pandas DataFrame.
        In addition to the selected columns, LanceDB also returns a vector
@@ -230,9 +233,26 @@ class LanceQueryBuilder:
        )
        return self._table._execute_query(query)
    def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
        """Return the table as a list of pydantic models.
        Parameters
        ----------
        model: Type[LanceModel]
            The pydantic model to use.
        Returns
        -------
        List[LanceModel]
        """
        return [
            model(**{k: v for k, v in row.items() if k in model.field_names()})
            for row in self.to_arrow().to_pylist()
        ]
 class LanceFtsQueryBuilder(LanceQueryBuilder):
-    def to_arrow(self) -> pd.Table:
+    def to_arrow(self) -> pa.Table:
        try:
            import tantivy
        except ImportError:
--- a/python/lancedb/remote/db.py
+++ b/python/lancedb/remote/db.py
@@ -20,7 +20,6 @@ import pyarrow as pa
 from lancedb.common import DATA
 from lancedb.db import DBConnection
 from lancedb.schema import schema_to_json
 from lancedb.table import Table, _sanitize_data
 from .arrow import to_ipc_binary
--- a/python/lancedb/remote/table.py
+++ b/python/lancedb/remote/table.py
@@ -16,11 +16,11 @@ from functools import cached_property
 from typing import Union
 import pyarrow as pa
 from lance import json_to_schema
 from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
-from ..query import LanceQueryBuilder, Query
+from ..query import LanceQueryBuilder
 from ..schema import json_to_schema
 from ..table import Query, Table, _sanitize_data
 from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE
--- a/python/lancedb/schema.py
+++ b/python/lancedb/schema.py
@@ -12,11 +12,7 @@
 #  limitations under the License.
 """Schema related utilities."""
 from typing import Any, Dict, Type
 import pyarrow as pa
 from lance import json_to_schema, schema_to_json
 def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -20,26 +20,32 @@ from typing import Iterable, List, Union
 import lance
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
 from lance import LanceDataset
 from lance.vector import vec_to_table
 from .common import DATA, VEC, VECTOR_COLUMN_NAME
 from .pydantic import LanceModel
 from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
-from .util import fs_from_uri
+from .util import fs_from_uri, safe_import_pandas
 pd = safe_import_pandas()
 def _sanitize_data(data, schema, on_bad_vectors, fill_value):
    if isinstance(data, list):
        # convert to list of dict if data is a bunch of LanceModels
        if isinstance(data[0], LanceModel):
            schema = data[0].__class__.to_arrow_schema()
            data = [dict(d) for d in data]
        data = pa.Table.from_pylist(data)
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
        )
    if isinstance(data, dict):
        data = vec_to_table(data)
-    if isinstance(data, pd.DataFrame):
+    if pd is not None and isinstance(data, pd.DataFrame):
        data = pa.Table.from_pandas(data)
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
@@ -94,7 +100,7 @@ class Table(ABC):
        """
        raise NotImplementedError
-    def to_pandas(self) -> pd.DataFrame:
+    def to_pandas(self):
        """Return the table as a pandas DataFrame.
        Returns
@@ -328,7 +334,7 @@ class LanceTable(Table):
        """Return the first n rows of the table."""
        return self._dataset.head(n)
-    def to_pandas(self) -> pd.DataFrame:
+    def to_pandas(self) -> "pd.DataFrame":
        """Return the table as a pandas DataFrame.
        Returns
--- a/python/lancedb/util.py
+++ b/python/lancedb/util.py
@@ -15,7 +15,6 @@ import os
 from typing import Tuple
 from urllib.parse import urlparse
 import pyarrow as pa
 import pyarrow.fs as pa_fs
@@ -76,3 +75,12 @@ def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
        return fs, path
    return pa_fs.FileSystem.from_uri(uri)
 def safe_import_pandas():
    try:
        import pandas as pd
        return pd
    except ImportError:
        return None
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "lancedb"
-version = "0.1.15"
+version = "0.1.16"
-dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
+dependencies = ["pylance==0.5.10", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
 description = "lancedb"
 authors = [
    { name = "LanceDB Devs", email = "dev@lancedb.com" },
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"
 [project.optional-dependencies]
 tests = [
-    "pytest", "pytest-mock", "pytest-asyncio"
+    "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio"
 ]
 dev = [
    "ruff", "pre-commit", "black"
--- a/python/tests/test_db.py
+++ b/python/tests/test_db.py
@@ -149,6 +149,10 @@ def test_delete_table(tmp_path):
    db.create_table("test", data=data)
    assert db.table_names() == ["test"]
    # dropping a table that does not exist should pass
    # if ignore_missing=True
    db.drop_table("does_not_exist", ignore_missing=True)
 def test_empty_or_nonexistent_table(tmp_path):
    db = lancedb.connect(tmp_path)
--- a/python/tests/test_pydantic.py
+++ b/python/tests/test_pydantic.py
@@ -20,7 +20,7 @@ import pyarrow as pa
 import pydantic
 import pytest
-from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
+from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, pydantic_to_schema, vector
@pytest.mark.skipif(
@@ -163,3 +163,13 @@ def test_fixed_size_list_validation():
        TestModel(vec=range(7))
    TestModel(vec=range(8))
 def test_lance_model():
    class TestModel(LanceModel):
        vec: vector(16)
        li: List[int]
    schema = pydantic_to_schema(TestModel)
    assert schema == TestModel.to_arrow_schema()
    assert TestModel.field_names() == ["vec", "li"]
--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -20,6 +20,7 @@ import pyarrow as pa
 import pytest
 from lancedb.db import LanceDBConnection
 from lancedb.pydantic import LanceModel, vector
 from lancedb.query import LanceQueryBuilder, Query
 from lancedb.table import LanceTable
@@ -64,6 +65,24 @@ def table(tmp_path) -> MockTable:
    return MockTable(tmp_path)
 def test_cast(table):
    class TestModel(LanceModel):
        vector: vector(2)
        id: int
        str_field: str
        float_field: float
    q = LanceQueryBuilder(table, [0, 0], "vector").limit(1)
    results = q.to_pydantic(TestModel)
    assert len(results) == 1
    r0 = results[0]
    assert isinstance(r0, TestModel)
    assert r0.id == 1
    assert r0.vector == [1, 2]
    assert r0.str_field == "a"
    assert r0.float_field == 1.0
 def test_query_builder(table):
    df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
    assert df["id"].values[0] == 1
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -13,15 +13,16 @@
 import functools
 from pathlib import Path
 from typing import List
 from unittest.mock import PropertyMock, patch
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
 from lance.vector import vec_to_table
 from lancedb.db import LanceDBConnection
 from lancedb.pydantic import LanceModel, vector
 from lancedb.table import LanceTable
@@ -135,6 +136,17 @@ def test_add(db):
    _add(table, schema)
 def test_add_pydantic_model(db):
    class TestModel(LanceModel):
        vector: vector(16)
        li: List[int]
    data = TestModel(vector=list(range(16)), li=[1, 2, 3])
    table = LanceTable.create(db, "test", data=[data])
    assert len(table) == 1
    assert table.schema == TestModel.to_arrow_schema()
 def _add(table, schema):
    # table = LanceTable(db, "test")
    assert len(table) == 2
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -13,6 +13,7 @@ crate-type = ["cdylib"]
 arrow-array = { workspace = true }
 arrow-ipc = { workspace = true }
 arrow-schema = { workspace = true }
 conv = "0.3.3"
 once_cell = "1"
 futures = "0.3"
 half = { workspace = true }
--- a/rust/ffi/node/src/error.rs
+++ b/rust/ffi/node/src/error.rs
@@ -22,8 +22,15 @@ use snafu::Snafu;
 pub enum Error {
    #[snafu(display("column '{name}' is missing"))]
    MissingColumn { name: String },
    #[snafu(display("{name}: {message}"))]
    RangeError { name: String, message: String },
    #[snafu(display("{index_type} is not a valid index type"))]
    InvalidIndexType { index_type: String },
    #[snafu(display("{message}"))]
    LanceDB { message: String },
    #[snafu(display("{message}"))]
    Neon { message: String },
 }
 pub type Result<T> = std::result::Result<T, Error>;
@@ -52,6 +59,14 @@ impl From<ArrowError> for Error {
    }
 }
 impl From<neon::result::Throw> for Error {
    fn from(value: neon::result::Throw) -> Self {
        Self::Neon {
            message: value.to_string(),
        }
    }
 }
 /// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
 /// so it can be returned as a JavaScript error
 /// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
--- a/rust/ffi/node/src/index/vector.rs
+++ b/rust/ffi/node/src/index/vector.rs
@@ -22,12 +22,15 @@ use neon::prelude::*;
 use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
 use crate::error::Error::InvalidIndexType;
 use crate::error::ResultExt;
 use crate::neon_ext::js_object_ext::JsObjectExt;
 use crate::{runtime, JsTable};
 pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
    let index_params = cx.argument::<JsObject>(0)?;
-    let index_params_builder = get_index_params_builder(&mut cx, index_params).unwrap();
+    let index_params_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;
    let rt = runtime(&mut cx)?;
    let channel = cx.channel();
@@ -54,27 +57,21 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
 fn get_index_params_builder(
    cx: &mut FunctionContext,
    obj: Handle<JsObject>,
-) -> Result<impl VectorIndexBuilder, String> {
+) -> crate::error::Result<impl VectorIndexBuilder> {
-    let idx_type = obj
+    let idx_type = obj.get::<JsString, _, _>(cx, "type")?.value(cx);
        .get::<JsString, _, _>(cx, "type")
        .map_err(|t| t.to_string())?
        .value(cx);
    match idx_type.as_str() {
        "ivf_pq" => {
            let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
            let mut pq_params = PQBuildParams::default();
-            obj.get_opt::<JsString, _, _>(cx, "column")
+            obj.get_opt::<JsString, _, _>(cx, "column")?
                .map_err(|t| t.to_string())?
                .map(|s| index_builder.column(s.value(cx)));
-            obj.get_opt::<JsString, _, _>(cx, "index_name")
+            obj.get_opt::<JsString, _, _>(cx, "index_name")?
                .map_err(|t| t.to_string())?
                .map(|s| index_builder.index_name(s.value(cx)));
-            obj.get_opt::<JsString, _, _>(cx, "metric_type")
+            obj.get_opt::<JsString, _, _>(cx, "metric_type")?
                .map_err(|t| t.to_string())?
                .map(|s| MetricType::try_from(s.value(cx).as_str()))
                .map(|mt| {
                    let metric_type = mt.unwrap();
@@ -82,15 +79,8 @@ fn get_index_params_builder(
                    pq_params.metric_type = metric_type;
                });
-            let num_partitions = obj
+            let num_partitions = obj.get_opt_usize(cx, "num_partitions")?;
-                .get_opt::<JsNumber, _, _>(cx, "num_partitions")
+            let max_iters = obj.get_opt_usize(cx, "max_iters")?;
                .map_err(|t| t.to_string())?
                .map(|s| s.value(cx) as usize);
            let max_iters = obj
                .get_opt::<JsNumber, _, _>(cx, "max_iters")
                .map_err(|t| t.to_string())?
                .map(|s| s.value(cx) as usize);
            num_partitions.map(|np| {
                let max_iters = max_iters.unwrap_or(50);
@@ -102,32 +92,28 @@ fn get_index_params_builder(
                index_builder.ivf_params(ivf_params)
            });
-            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")
+            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")?
                .map_err(|t| t.to_string())?
                .map(|s| pq_params.use_opq = s.value(cx));
-            obj.get_opt::<JsNumber, _, _>(cx, "num_sub_vectors")
+            obj.get_opt_usize(cx, "num_sub_vectors")?
-                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.num_sub_vectors = s);
                .map(|s| pq_params.num_sub_vectors = s.value(cx) as usize);
-            obj.get_opt::<JsNumber, _, _>(cx, "num_bits")
+            obj.get_opt_usize(cx, "num_bits")?
-                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.num_bits = s);
                .map(|s| pq_params.num_bits = s.value(cx) as usize);
-            obj.get_opt::<JsNumber, _, _>(cx, "max_iters")
+            obj.get_opt_usize(cx, "max_iters")?
-                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.max_iters = s);
                .map(|s| pq_params.max_iters = s.value(cx) as usize);
-            obj.get_opt::<JsNumber, _, _>(cx, "max_opq_iters")
+            obj.get_opt_usize(cx, "max_opq_iters")?
-                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.max_opq_iters = s);
                .map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
-            obj.get_opt::<JsBoolean, _, _>(cx, "replace")
+            obj.get_opt::<JsBoolean, _, _>(cx, "replace")?
                .map_err(|t| t.to_string())?
                .map(|s| index_builder.replace(s.value(cx)));
            Ok(index_builder)
        }
-        t => Err(format!("{} is not a valid index type", t).to_string()),
+        index_type => Err(InvalidIndexType {
            index_type: index_type.into(),
        }),
    }
 }
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -31,16 +31,17 @@ use once_cell::sync::OnceCell;
 use tokio::runtime::Runtime;
 use vectordb::database::Database;
 use vectordb::error::Error;
 use vectordb::table::{ReadParams, Table};
 use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
 use crate::error::ResultExt;
 use crate::neon_ext::js_object_ext::JsObjectExt;
 mod arrow;
 mod convert;
 mod error;
 mod index;
 mod neon_ext;
 struct JsDatabase {
    database: Arc<Database>,
@@ -245,12 +246,9 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
        .get_opt::<JsString, _, _>(&mut cx, "_filter")?
        .map(|s| s.value(&mut cx));
    let refine_factor = query_obj
-        .get_opt::<JsNumber, _, _>(&mut cx, "_refineFactor")?
+        .get_opt_u32(&mut cx, "_refineFactor")
-        .map(|s| s.value(&mut cx))
+        .or_throw(&mut cx)?;
-        .map(|i| i as u32);
+    let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
    let nprobes = query_obj
        .get::<JsNumber, _, _>(&mut cx, "_nprobes")?
        .value(&mut cx) as usize;
    let metric_type = query_obj
        .get_opt::<JsString, _, _>(&mut cx, "_metricType")?
        .map(|s| s.value(&mut cx))
@@ -277,7 +275,11 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
            .select(select);
        let record_batch_stream = builder.execute();
        let results = record_batch_stream
-            .and_then(|stream| stream.try_collect::<Vec<_>>().map_err(Error::from))
+            .and_then(|stream| {
                stream
                    .try_collect::<Vec<_>>()
                    .map_err(vectordb::error::Error::from)
            })
            .await;
        deferred.settle_with(&channel, move |mut cx| {
--- a/rust/ffi/node/src/neon_ext.rs
+++ b/rust/ffi/node/src/neon_ext.rs
@@ -0,0 +1,15 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 pub mod js_object_ext;
--- a/rust/ffi/node/src/neon_ext/js_object_ext.rs
+++ b/rust/ffi/node/src/neon_ext/js_object_ext.rs
@@ -0,0 +1,82 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 use crate::error::{Error, Result};
 use neon::prelude::*;
 // extends neon's [JsObject] with helper functions to extract properties
 pub trait JsObjectExt {
    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
 }
 impl JsObjectExt for JsObject {
    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>> {
        let val_opt = self
            .get_opt::<JsNumber, _, _>(cx, key)?
            .map(|s| f64_to_u32_safe(s.value(cx), key));
        val_opt.transpose()
    }
    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize> {
        let val = self.get::<JsNumber, _, _>(cx, key)?.value(cx);
        f64_to_usize_safe(val, key)
    }
    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>> {
        let val_opt = self
            .get_opt::<JsNumber, _, _>(cx, key)?
            .map(|s| f64_to_usize_safe(s.value(cx), key));
        val_opt.transpose()
    }
 }
 fn f64_to_u32_safe(n: f64, key: &str) -> Result<u32> {
    use conv::*;
    n.approx_as::<u32>().map_err(|e| match e {
        FloatError::NegOverflow(_) => Error::RangeError {
            name: key.into(),
            message: "must be > 0".to_string(),
        },
        FloatError::PosOverflow(_) => Error::RangeError {
            name: key.into(),
            message: format!("must be < {}", u32::MAX),
        },
        FloatError::NotANumber(_) => Error::RangeError {
            name: key.into(),
            message: "not a valid number".to_string(),
        },
    })
 }
 fn f64_to_usize_safe(n: f64, key: &str) -> Result<usize> {
    use conv::*;
    n.approx_as::<usize>().map_err(|e| match e {
        FloatError::NegOverflow(_) => Error::RangeError {
            name: key.into(),
            message: "must be > 0".to_string(),
        },
        FloatError::PosOverflow(_) => Error::RangeError {
            name: key.into(),
            message: format!("must be < {}", usize::MAX),
        },
        FloatError::NotANumber(_) => Error::RangeError {
            name: key.into(),
            message: "not a valid number".to_string(),
        },
    })
 }
Author	SHA1	Message	Date
Lance Release	b06e214d29	[python] Bump version: 0.1.15 → 0.1.16	2023-07-31 18:32:40 +00:00
Chang She	c1f8feb6ed	make pandas an optional dependency in lancedb as well (#385 )	2023-07-31 14:08:58 -04:00
Chang She	cada35d5b7	Improve pydantic integration (#384 )	2023-07-31 12:16:44 -04:00
Chang She	2d25c263e9	Implement drop table if exists (#383 )	2023-07-31 10:25:09 +02:00
gsilvestrin	bcd7f66dc7	fix(node): Handle overflows in the node bridge (#372 ) - Fixes many numeric conversions that results in hard to reproduce issues - JsObjectExt extends JsObject with safe methods to extract numericvalues	2023-07-28 13:15:21 -07:00
gsilvestrin	1daecac648	fix(python): Pin pylance and add pandas as test dependency (#373 )	2023-07-27 15:21:45 -07:00
Lance Release	b8e656b2a7	Updating package-lock.json	2023-07-27 21:53:30 +00:00
Lance Release	ff7c1193a7	Updating package-lock.json	2023-07-27 21:06:32 +00:00