[python] Bump version: 0.1.15 → 0.1.16

make pandas an optional dependency in lancedb as well (#385 )
Improve pydantic integration (#384 )
2025-12-25 14:29:56 +00:00 · 2023-07-31 18:32:40 +00:00 · 2023-07-31 14:08:58 -04:00 · 2023-07-31 12:16:44 -04:00 · 2023-07-31 10:25:09 +02:00 · 2023-07-28 13:15:21 -07:00
59 changed files with 1291 additions and 414 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.15
+current_version = 0.1.19
 commit = True
 message = Bump version: {current_version} → {new_version}
 tag = True
--- a/.github/workflows/make-release-commit.yml
+++ b/.github/workflows/make-release-commit.yml
@@ -25,38 +25,35 @@ jobs:
  bump-version:
    runs-on: ubuntu-latest
    steps:
-    - name: Check out main
-      uses: actions/checkout@v3
-      with:
-        ref: main
-        persist-credentials: false
-        fetch-depth: 0
-        lfs: true
-    - name: Set git configs for bumpversion
-      shell: bash
-      run: |
-        git config user.name 'Lance Release'
-        git config user.email 'lance-dev@lancedb.com'
-    - name: Set up Python 3.10
-      uses: actions/setup-python@v4
-      with:
-        python-version: "3.10"
-    - name: Bump version, create tag and commit
-      run: |
-        pip install bump2version
-        bumpversion --verbose ${{ inputs.part }}
-    - name: Update package-lock.json file
-      run: |
-        npm install
-        git add package-lock.json
-        # Add this change to the commit created by bumpversion
-        git commit --amend --no-edit
-      working-directory: node
-    - name: Push new version and tag
-      if: ${{ inputs.dry_run }} == "false"
-      uses: ad-m/github-push-action@master
-      with:
-        github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-        branch: main
-        tags: true
+      - name: Check out main
+        uses: actions/checkout@v3
+        with:
+          ref: main
+          persist-credentials: false
+          fetch-depth: 0
+          lfs: true
+      - name: Set git configs for bumpversion
+        shell: bash
+        run: |
+          git config user.name 'Lance Release'
+          git config user.email 'lance-dev@lancedb.com'
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Bump version, create tag and commit
+        run: |
+          pip install bump2version
+          bumpversion --verbose ${{ inputs.part }}
+      - name: Push new version and tag
+        if: ${{ inputs.dry_run }} == "false"
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
+          branch: main
+          tags: true
+      - uses: ./.github/workflows/update_package_lock
+        if: ${{ inputs.dry_run }} == "false"
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}

--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -70,7 +70,7 @@ jobs:
        npm run tsc
        npm run build
        npm run pack-build
-        npm install --no-save ./dist/vectordb-*.tgz
+        npm install --no-save ./dist/lancedb-vectordb-*.tgz
        # Remove index.node to test with dependency installed
        rm index.node
    - name: Test
@@ -101,7 +101,7 @@ jobs:
        npm run tsc
        npm run build
        npm run pack-build
-        npm install --no-save ./dist/vectordb-*.tgz
+        npm install --no-save ./dist/lancedb-vectordb-*.tgz
        # Remove index.node to test with dependency installed
        rm index.node
    - name: Test
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -46,75 +46,51 @@ jobs:
      matrix:
        target: [x86_64-apple-darwin, aarch64-apple-darwin]
    steps:
-    - name: Checkout
-      uses: actions/checkout@v3
-    - name: Install system dependencies
-      run: brew install protobuf
-    - name: Install npm dependencies
-      run: |
-        cd node
-        npm ci
-    - name: Install rustup target
-      if: ${{ matrix.target == 'aarch64-apple-darwin' }}
-      run: rustup target add aarch64-apple-darwin
-    - name: Build MacOS native node modules
-      run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
-    - name: Upload Darwin Artifacts
-      uses: actions/upload-artifact@v3
-      with:
-        name: darwin-native
-        path: |
-          node/dist/vectordb-darwin*.tgz
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Install system dependencies
+        run: brew install protobuf
+      - name: Install npm dependencies
+        run: |
+          cd node
+          npm ci
+      - name: Install rustup target
+        if: ${{ matrix.target == 'aarch64-apple-darwin' }}
+        run: rustup target add aarch64-apple-darwin
+      - name: Build MacOS native node modules
+        run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
+      - name: Upload Darwin Artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: native-darwin
+          path: |
+            node/dist/lancedb-vectordb-darwin*.tgz

  node-linux:
-    name: node-linux (${{ matrix.arch}}-unknown-linux-${{ matrix.libc }})
-    runs-on: ubuntu-latest
+    name: node-linux (${{ matrix.config.arch}}-unknown-linux-gnu
+    runs-on: ${{ matrix.config.runner }}
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    strategy:
      fail-fast: false
      matrix:
-        libc:
-          - gnu
-          # TODO: re-enable musl once we have refactored to pre-built containers
-          # Right now we have to build node from source which is too expensive.
-          # - musl
-        arch:
-          - x86_64
-          # Building on aarch64 is too slow for now
-          # - aarch64
+        config:
+          - arch: x86_64
+            runner: ubuntu-latest
+          - arch: aarch64
+            runner: buildjet-4vcpu-ubuntu-2204-arm
    steps:
-    - name: Checkout
-      uses: actions/checkout@v3
-    - name: Change owner to root (for npm)
-      # The docker container is run as root, so we need the files to be owned by root
-      # Otherwise npm is a nightmare: https://github.com/npm/cli/issues/3773
-      run: sudo chown -R root:root .
-    - name: Set up QEMU
-      if: ${{ matrix.arch == 'aarch64' }}
-      uses: docker/setup-qemu-action@v2
-      with:
-        platforms: arm64
-    - name: Build Linux GNU native node modules
-      if: ${{ matrix.libc == 'gnu' }}
-      run: |
-        docker run \
-          -v $(pwd):/io -w /io \
-          rust:1.70-bookworm \
-          bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-gnu
-    - name: Build musl Linux native node modules
-      if: ${{ matrix.libc == 'musl' }}
-      run: |
-        docker run --platform linux/arm64/v8 \
-          -v $(pwd):/io -w /io \
-          quay.io/pypa/musllinux_1_1_${{ matrix.arch }} \
-          bash ci/build_linux_artifacts.sh ${{ matrix.arch }}-unknown-linux-musl
-    - name: Upload Linux Artifacts
-      uses: actions/upload-artifact@v3
-      with:
-        name: linux-native
-        path: |
-          node/dist/vectordb-linux*.tgz
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Build Linux Artifacts
+        run: |
+          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }}
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: native-linux
+          path: |
+            node/dist/lancedb-vectordb-linux*.tgz

  node-windows:
    runs-on: windows-2022
@@ -145,12 +121,12 @@ jobs:
      - name: Upload Windows Artifacts
        uses: actions/upload-artifact@v3
        with:
-          name: windows-native
+          name: native-windows
          path: |
-            node/dist/vectordb-win32*.tgz
+            node/dist/lancedb-vectordb-win32*.tgz

  release:
-    needs: [node, node-macos, node-linux]
+    needs: [node, node-macos, node-linux, node-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -170,3 +146,18 @@ jobs:
          for filename in *.tgz; do
            npm publish $filename
          done
+
+  update-package-lock:
+    needs: [release]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          ref: main
+          persist-credentials: false
+          fetch-depth: 0
+          lfs: true
+      - uses: ./.github/workflows/update_package_lock
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -30,7 +30,7 @@ jobs:
        python-version: 3.${{ matrix.python-minor-version }}
    - name: Install lancedb
      run: |
-        pip install -e .
+        pip install -e .[tests]
        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest pytest-mock black isort
    - name: Black
@@ -59,7 +59,7 @@ jobs:
        python-version: "3.11"
    - name: Install lancedb
      run: |
-        pip install -e .
+        pip install -e .[tests]
        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest pytest-mock black
    - name: Black
--- a/.github/workflows/update_package_lock/action.yml
+++ b/.github/workflows/update_package_lock/action.yml
@@ -0,0 +1,33 @@
+name: update_package_lock
+description: "Update node's package.lock"
+
+inputs:
+  github_token:
+    required: true
+    description: "github token for the repo"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+    - name: Set git configs
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Update package-lock.json file
+      working-directory: ./node
+      run: |
+        npm install
+        git add package-lock.json
+        git commit -m "Updating package-lock.json"
+      shell: bash
+    - name: Push changes
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ inputs.github_token }}
+        branch: main
+        tags: true
--- a/.github/workflows/update_package_lock_run.yml
+++ b/.github/workflows/update_package_lock_run.yml
@@ -0,0 +1,19 @@
+name: Update package-lock.json
+
+on:
+  workflow_dispatch:
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          ref: main
+          persist-credentials: false
+          fetch-depth: 0
+          lfs: true
+      - uses: ./.github/workflows/update_package_lock
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,11 +6,12 @@ members = [
 resolver = "2"

 [workspace.dependencies]
-lance = "=0.5.8"
+lance = "=0.5.9"
 arrow-array = "42.0"
 arrow-data = "42.0"
 arrow-schema = "42.0"
 arrow-ipc = "42.0"
 half = { "version" = "=2.2.1", default-features = false }
 object_store = "0.6.1"
+snafu = "0.7.4"

--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -1,72 +1,19 @@
 #!/bin/bash
-# Builds the Linux artifacts (node binaries).
-# Usage: ./build_linux_artifacts.sh [target]
-# Targets supported: 
-# - x86_64-unknown-linux-gnu:centos
-# - aarch64-unknown-linux-gnu:centos
-# - aarch64-unknown-linux-musl
-# - x86_64-unknown-linux-musl
-
-# TODO: refactor this into a Docker container we can pull
-
 set -e
+ARCH=${1:-x86_64}

-setup_dependencies() {
-    echo "Installing system dependencies..."
-    if [[ $1 == *musl ]]; then
-        # musllinux
-        apk add openssl-dev
-    else
-        # rust / debian
-        apt update
-        apt install -y libssl-dev protobuf-compiler
-    fi
-}
+# We pass down the current user so that when we later mount the local files 
+# into the container, the files are accessible by the current user.
+pushd ci/manylinux_node
+docker build \
+    -t lancedb-node-manylinux \
+    --build-arg="ARCH=$ARCH" \
+    --build-arg="DOCKER_USER=$(id -u)" \
+    --progress=plain \
+    .
+popd

-install_node() {
-    echo "Installing node..."
-    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
-    source "$HOME"/.bashrc
-
-    if [[ $1 == *musl ]]; then
-        # This node version is 15, we need 16 or higher:
-        # apk add nodejs-current npm 
-        # So instead we install from source (nvm doesn't provide binaries for musl):
-        nvm install -s --no-progress 17
-    else
-        nvm install --no-progress 17 # latest that supports glibc 2.17
-    fi
-}
-
-build_node_binary() {
-    echo "Building node library for $1..."
-    pushd node
-
-    npm ci
-    
-    if [[ $1 == *musl ]]; then
-        # This is needed for cargo to allow build cdylibs with musl
-        export RUSTFLAGS="-C target-feature=-crt-static"
-    fi
-
-    # Cargo can run out of memory while pulling dependencies, especially when running
-    # in QEMU. This is a workaround for that.
-    export CARGO_NET_GIT_FETCH_WITH_CLI=true
-
-    # We don't pass in target, since the native target here already matches
-    # We need to pass OPENSSL_LIB_DIR and OPENSSL_INCLUDE_DIR for static build to work https://github.com/sfackler/rust-openssl/issues/877
-    OPENSSL_STATIC=1 OPENSSL_LIB_DIR=/usr/lib/x86_64-linux-gnu OPENSSL_INCLUDE_DIR=/usr/include/openssl/ npm run build-release
-    npm run pack-build
-
-    popd
-}
-
-TARGET=${1:-x86_64-unknown-linux-gnu}
-# Others:
-# aarch64-unknown-linux-gnu
-# x86_64-unknown-linux-musl
-# aarch64-unknown-linux-musl
-
-setup_dependencies $TARGET
-install_node $TARGET
-build_node_binary $TARGET
+docker run \
+    -v $(pwd):/io -w /io \
+    lancedb-node-manylinux \
+    bash ci/manylinux_node/build.sh $ARCH
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -0,0 +1,31 @@
+# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
+# This container allows building the node modules native libraries in an 
+# environment with a very old glibc, so that we are compatible with a wide
+# range of linux distributions.
+ARG ARCH=x86_64
+
+FROM quay.io/pypa/manylinux2014_${ARCH}
+
+ARG ARCH=x86_64
+ARG DOCKER_USER=default_user
+
+# Install static openssl
+COPY install_openssl.sh install_openssl.sh
+RUN ./install_openssl.sh ${ARCH} > /dev/null
+
+# Protobuf is also installed as root.
+COPY install_protobuf.sh install_protobuf.sh
+RUN ./install_protobuf.sh ${ARCH}
+
+ENV DOCKER_USER=${DOCKER_USER}
+# Create a group and user
+RUN echo ${ARCH} && adduser --user-group --create-home --uid ${DOCKER_USER} build_user
+
+# We switch to the user to install Rust and Node, since those like to be 
+# installed at the user level.
+USER ${DOCKER_USER}
+
+COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
+RUN cp /prepare_manylinux_node.sh $HOME/ && \
+    cd $HOME && \
+    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_node/build.sh
+++ b/ci/manylinux_node/build.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
+set -e
+ARCH=${1:-x86_64}
+
+if [ "$ARCH" = "x86_64" ]; then
+    export OPENSSL_LIB_DIR=/usr/local/lib64/
+else 
+    export OPENSSL_LIB_DIR=/usr/local/lib/
+fi
+export OPENSSL_STATIC=1
+export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
+
+source $HOME/.bashrc
+
+cd node
+npm ci
+npm run build-release
+npm run pack-build
--- a/ci/manylinux_node/install_openssl.sh
+++ b/ci/manylinux_node/install_openssl.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Builds openssl from source so we can statically link to it
+
+# this is to avoid the error we get with the system installation:
+# /usr/bin/ld: <library>: version node not found for symbol SSLeay@@OPENSSL_1.0.1
+# /usr/bin/ld: failed to set dynamic section sizes: Bad value
+set -e
+
+git clone -b OpenSSL_1_1_1u \
+    --single-branch \
+    https://github.com/openssl/openssl.git
+
+pushd openssl
+
+if [[ $1 == x86_64* ]]; then
+    ARCH=linux-x86_64
+else
+    # gnu target
+    ARCH=linux-aarch64
+fi
+
+./Configure no-shared $ARCH
+
+make
+
+make install
--- a/ci/manylinux_node/install_protobuf.sh
+++ b/ci/manylinux_node/install_protobuf.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Installs protobuf compiler. Should be run as root.
+set -e
+
+if [[ $1 == x86_64* ]]; then
+    ARCH=x86_64
+else
+    # gnu target
+    ARCH=aarch_64
+fi
+
+PB_REL=https://github.com/protocolbuffers/protobuf/releases
+PB_VERSION=23.1
+curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
+unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_node/prepare_manylinux_node.sh
+++ b/ci/manylinux_node/prepare_manylinux_node.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+
+install_node() {
+    echo "Installing node..."
+
+    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
+
+    source "$HOME"/.bashrc
+
+    nvm install --no-progress 16
+}
+
+install_rust() {
+    echo "Installing rust..."
+    curl https://sh.rustup.rs -sSf | bash -s -- -y
+    export PATH="$PATH:/root/.cargo/bin"
+}
+
+install_node
+install_rust
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -57,12 +57,14 @@ nav:
 - Basics: basic.md
 - Embeddings: embedding.md
 - Python full-text search: fts.md
- Python integrations:
+- Integrations:
  - Pandas and PyArrow: python/arrow.md
  - DuckDB: python/duckdb.md
  - LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
+  - LangChain JS/TS 🦜️🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
  - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
  - Pydantic: python/pydantic.md
+  - Voxel51: integrations/voxel51.md
 - Python examples:
  - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
  - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
@@ -72,6 +74,7 @@ nav:
 - Javascript examples:
  - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
  - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
+
 - References:
  - Vector Search: search.md
  - SQL filters: sql.md
--- a/docs/src/assets/voxel.gif
+++ b/docs/src/assets/voxel.gif
--- a/docs/src/examples/transformerjs_embedding_search_nodejs.md
+++ b/docs/src/examples/transformerjs_embedding_search_nodejs.md
@@ -1,6 +1,6 @@
 # Vector embedding search using TransformersJS

-## Embed and query data from LacneDB using TransformersJS
+## Embed and query data from LanceDB using TransformersJS

 <img id="splash" width="400" alt="transformersjs" src="https://github.com/lancedb/lancedb/assets/43097991/88a31e30-3d6f-4eef-9216-4b7c688f1b4f">

--- a/docs/src/examples/youtube_transcript_bot.md
+++ b/docs/src/examples/youtube_transcript_bot.md
@@ -4,4 +4,10 @@

 <img id="splash" width="400" alt="youtube transcript search" src="https://user-images.githubusercontent.com/917119/236965568-def7394d-171c-45f2-939d-8edfeaadd88c.png">

+
+<a href="https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/youtube_bot/main.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab">
+
+Scripts - [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/youtube_bot/main.py)  [![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)](./examples/youtube_bot/index.js)
+
+
 This example is in a [notebook](https://github.com/lancedb/lancedb/blob/main/docs/src/notebooks/youtube_transcript_search.ipynb)
--- a/docs/src/integrations/voxel51.md
+++ b/docs/src/integrations/voxel51.md
@@ -0,0 +1,71 @@
+![example](/assets/voxel.gif)
+
+Basic recipe
+____________
+
+The basic workflow to use LanceDB to create a similarity index on your FiftyOne
+datasets and use this to query your data is as follows:
+
+1)  Load a dataset into FiftyOne
+
+2)  Compute embedding vectors for samples or patches in your dataset, or select
+    a model to use to generate embeddings
+
+3)  Use the `compute_similarity()`
+    method to generate a LanceDB table for the samples or object
+    patches embeddings in a dataset by setting the parameter `backend="lancedb"` and
+    specifying a `brain_key` of your choice
+
+4)  Use this LanceDB table to query your data with
+    `sort_by_similarity()`
+
+5) If desired, delete the table
+
+The example below demonstrates this workflow.
+
+!!! Note
+
+    You must install the LanceDB Python client to run this
+    ```
+    pip install lancedb
+    ```
+
+```python
+
+import fiftyone as fo
+import fiftyone.brain as fob
+import fiftyone.zoo as foz
+
+# Step 1: Load your data into FiftyOne
+dataset = foz.load_zoo_dataset("quickstart")
+
+# Steps 2 and 3: Compute embeddings and create a similarity index
+lancedb_index = fob.compute_similarity(
+    dataset, 
+    model="clip-vit-base32-torch",
+    brain_key="lancedb_index",
+    backend="lancedb",
+)
+```
+Once the similarity index has been generated, we can query our data in FiftyOne
+by specifying the `brain_key`:
+
+```python
+# Step 4: Query your data
+query = dataset.first().id  # query by sample ID
+view = dataset.sort_by_similarity(
+    query, 
+    brain_key="lancedb_index",
+    k=10,  # limit to 10 most similar samples
+)
+
+# Step 5 (optional): Cleanup
+
+# Delete the LanceDB table
+lancedb_index.cleanup()
+
+# Delete run record from FiftyOne
+dataset.delete_brain_run("lancedb_index")
+```
+
+More in depth walkthrough of the integration, visit the LanceDB guide on Voxel51 - [LaceDB x Voxel51](https://docs.voxel51.com/integrations/lancedb.html)
--- a/docs/src/notebooks/code_qa_bot.ipynb
+++ b/docs/src/notebooks/code_qa_bot.ipynb
@@ -10,7 +10,11 @@
    "\n",
    "This Q&A bot will allow you to query your own documentation easily using questions. We'll also demonstrate the use of LangChain and LanceDB using the OpenAI API. \n",
    "\n",
-    "In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well"
+    "In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well\n",
+    "\n",
+    "<a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Code-Documentation-QA-Bot/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
+    "\n",
+    "Scripts -  [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/Code-Documentation-QA-Bot/main.py)  [![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)](./examples/Code-Documentation-QA-Bot/index.js)"
   ]
  },
  {
@@ -181,7 +185,7 @@
   "id": "c3852dd3",
   "metadata": {},
   "source": [
-    "# Generating emebeddings from our docs\n",
+    "# Generating embeddings from our docs\n",
    "\n",
    "Now that we have our raw documents loaded, we need to pre-process them to generate embeddings:"
   ]
--- a/docs/src/notebooks/multimodal_search.ipynb
+++ b/docs/src/notebooks/multimodal_search.ipynb
@@ -1,5 +1,14 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![example](https://github.com/lancedb/vectordb-recipes/assets/15766192/799f94a1-a01d-4a5b-a627-2a733bbb4227)\n",
+    "\n",
+    " <a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/multimodal_clip/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>| [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/multimodal_clip/main.py)  |"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@@ -42,6 +51,19 @@
    "## First run setup: Download data and pre-process"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Get dataset\n",
+    "\n",
+    "!wget https://eto-public.s3.us-west-2.amazonaws.com/datasets/diffusiondb_lance.tar.gz\n",
+    "!tar -xvf diffusiondb_lance.tar.gz\n",
+    "!mv diffusiondb_test rawdata.lance\n"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 30,
@@ -247,7 +269,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.11.4 64-bit",
   "language": "python",
   "name": "python3"
  },
@@ -261,7 +283,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.11.4"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
+   }
  }
 },
 "nbformat": 4,
--- a/docs/src/notebooks/youtube_transcript_search.ipynb
+++ b/docs/src/notebooks/youtube_transcript_search.ipynb
@@ -8,7 +8,12 @@
   "source": [
    "# Youtube Transcript Search QA Bot\n",
    "\n",
-    "This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily."
+    "This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily.\n",
+    "\n",
+    "\n",
+    "<a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/youtube_bot/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\">\n",
+    "\n",
+    "Scripts - [![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)](./examples/youtube_bot/main.py)  [![JavaScript](https://img.shields.io/badge/javascript-%23323330.svg?style=for-the-badge&logo=javascript&logoColor=%23F7DF1E)](./examples/youtube_bot/index.js)\n"
   ]
  },
  {
--- a/docs/src/python/pydantic.md
+++ b/docs/src/python/pydantic.md
@@ -1,6 +1,8 @@
 # Pydantic

 [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
+LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
+

 ## Schema

--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -7,7 +7,8 @@ excluded_files = [
    "../src/embedding.md",
    "../src/examples/serverless_lancedb_with_s3_and_lambda.md",
    "../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
-    "../src/examples/youtube_transcript_bot_with_nodejs.md"
+    "../src/examples/youtube_transcript_bot_with_nodejs.md",
+    "../src/integrations/voxel51.md",
 ]

 python_prefix = "py"
--- a/node/native.js
+++ b/node/native.js
@@ -17,7 +17,7 @@ const { currentTarget } = require('@neon-rs/load');
 let nativeLib;

 try {
-    nativeLib = require(`vectordb-${currentTarget()}`);
+    nativeLib = require(`@lancedb/vectordb-${currentTarget()}`);
 } catch (e) {
    try {
        // Might be developing locally, so try that. But don't expose that error
@@ -25,12 +25,12 @@ try {
        nativeLib = require("./index.node");
    } catch {
        throw new Error(`vectordb: failed to load native library.
-  You may need to run \`npm install vectordb-${currentTarget()}\`.
-      
+  You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
+
  If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
      
  Source error: ${e}`);
-    }   
+    }
 }

 // Dynamic require for runtime.
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.1.15",
+  "version": "0.1.19",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.1.15",
+      "version": "0.1.19",
      "cpu": [
        "x64",
        "arm64"
@@ -24,7 +24,7 @@
        "axios": "^1.4.0"
      },
      "devDependencies": {
-        "@neon-rs/cli": "^0.0.74",
+        "@neon-rs/cli": "^0.0.160",
        "@types/chai": "^4.3.4",
        "@types/chai-as-promised": "^7.1.5",
        "@types/mocha": "^10.0.1",
@@ -51,11 +51,11 @@
        "typescript": "*"
      },
      "optionalDependencies": {
-        "vectordb-darwin-arm64": "0.1.15",
-        "vectordb-darwin-x64": "0.1.15",
-        "vectordb-linux-arm64-gnu": "0.1.15",
-        "vectordb-linux-x64-gnu": "0.1.15",
-        "vectordb-win32-x64-msvc": "0.1.15"
+        "@lancedb/vectordb-darwin-arm64": "0.1.19",
+        "@lancedb/vectordb-darwin-x64": "0.1.19",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
+        "@lancedb/vectordb-linux-x64-gnu": "0.1.19",
+        "@lancedb/vectordb-win32-x64-msvc": "0.1.19"
      }
    },
    "node_modules/@apache-arrow/ts": {
@@ -85,6 +85,97 @@
      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz",
      "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg=="
    },
+    "node_modules/@cargo-messages/android-arm-eabi": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/android-arm-eabi/-/android-arm-eabi-0.0.160.tgz",
+      "integrity": "sha512-PTgCEmBHEPKJbxwlHVXB3aGES+NqpeBvn6hJNYWIkET3ZQCSJnScMlIDQXEkWndK7J+hW3Or3H32a93B/MbbfQ==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "android"
+      ]
+    },
+    "node_modules/@cargo-messages/darwin-arm64": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/darwin-arm64/-/darwin-arm64-0.0.160.tgz",
+      "integrity": "sha512-YSVUuc8TUTi/XmZVg9KrH0bDywKLqC1zeTyZYAYDDmqVDZW9KeTnbBUECKRs56iyHeO+kuEkVW7MKf7j2zb/FA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@cargo-messages/darwin-x64": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/darwin-x64/-/darwin-x64-0.0.160.tgz",
+      "integrity": "sha512-U+YlAR+9tKpBljnNPWMop5YhvtwfIPQSAaUYN2llteC7ZNU5/cv8CGT1vm7uFNxr2LeGuAtRbzIh2gUmTV8mng==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@cargo-messages/linux-arm-gnueabihf": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/linux-arm-gnueabihf/-/linux-arm-gnueabihf-0.0.160.tgz",
+      "integrity": "sha512-wqAelTzVv1E7Ls4aviqUbem5xjzCaJQxQtVnLhv6pf1k0UyEHCS2WdufFFmWcojGe7QglI4uve3KTe01MKYj0A==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@cargo-messages/linux-x64-gnu": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/linux-x64-gnu/-/linux-x64-gnu-0.0.160.tgz",
+      "integrity": "sha512-LQ6e7O7YYkWfDNIi/53q2QG/+lZok72LOG+NKDVCrrY4TYUcrTqWAybOV6IlkVntKPnpx8YB95umSQGeVuvhpQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@cargo-messages/win32-arm64-msvc": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/win32-arm64-msvc/-/win32-arm64-msvc-0.0.160.tgz",
+      "integrity": "sha512-VDMBhyun02gIDwmEhkYP1W9Z0tYqn4drgY5Iua1qV2tYOU58RVkWhzUYxM9rzYbnwKZlltgM46J/j5QZ3VaFrA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@cargo-messages/win32-x64-msvc": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/win32-x64-msvc/-/win32-x64-msvc-0.0.160.tgz",
+      "integrity": "sha512-vnoglDxF6zj0W/Co9D0H/bgnrhUuO5EumIf9v3ujLtBH94rAX11JsXh/FgC/8wQnQSsLyWSq70YxNS2wdETxjA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
    "node_modules/@cspotcode/source-map-support": {
      "version": "0.8.1",
      "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
@@ -223,13 +314,82 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
+    "node_modules/@lancedb/vectordb-darwin-arm64": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
+      "integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-darwin-x64": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
+      "integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
+      "integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
+      "integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
+      "integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
    "node_modules/@neon-rs/cli": {
-      "version": "0.0.74",
-      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
-      "integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
+      "integrity": "sha512-GQjzHPJVTOARbX3nP/fAWqBq7JlQ8XgfYlCa+iwzIXf0LC1EyfJTX+vqGD/36b9lKoyY01Z/aDUB9o/qF6ztHA==",
      "dev": true,
      "bin": {
        "neon": "index.js"
+      },
+      "optionalDependencies": {
+        "@cargo-messages/android-arm-eabi": "0.0.160",
+        "@cargo-messages/darwin-arm64": "0.0.160",
+        "@cargo-messages/darwin-x64": "0.0.160",
+        "@cargo-messages/linux-arm-gnueabihf": "0.0.160",
+        "@cargo-messages/linux-x64-gnu": "0.0.160",
+        "@cargo-messages/win32-arm64-msvc": "0.0.160",
+        "@cargo-messages/win32-x64-msvc": "0.0.160"
      }
    },
    "node_modules/@neon-rs/load": {
@@ -4542,6 +4702,55 @@
        }
      }
    },
+    "@cargo-messages/android-arm-eabi": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/android-arm-eabi/-/android-arm-eabi-0.0.160.tgz",
+      "integrity": "sha512-PTgCEmBHEPKJbxwlHVXB3aGES+NqpeBvn6hJNYWIkET3ZQCSJnScMlIDQXEkWndK7J+hW3Or3H32a93B/MbbfQ==",
+      "dev": true,
+      "optional": true
+    },
+    "@cargo-messages/darwin-arm64": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/darwin-arm64/-/darwin-arm64-0.0.160.tgz",
+      "integrity": "sha512-YSVUuc8TUTi/XmZVg9KrH0bDywKLqC1zeTyZYAYDDmqVDZW9KeTnbBUECKRs56iyHeO+kuEkVW7MKf7j2zb/FA==",
+      "dev": true,
+      "optional": true
+    },
+    "@cargo-messages/darwin-x64": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/darwin-x64/-/darwin-x64-0.0.160.tgz",
+      "integrity": "sha512-U+YlAR+9tKpBljnNPWMop5YhvtwfIPQSAaUYN2llteC7ZNU5/cv8CGT1vm7uFNxr2LeGuAtRbzIh2gUmTV8mng==",
+      "dev": true,
+      "optional": true
+    },
+    "@cargo-messages/linux-arm-gnueabihf": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/linux-arm-gnueabihf/-/linux-arm-gnueabihf-0.0.160.tgz",
+      "integrity": "sha512-wqAelTzVv1E7Ls4aviqUbem5xjzCaJQxQtVnLhv6pf1k0UyEHCS2WdufFFmWcojGe7QglI4uve3KTe01MKYj0A==",
+      "dev": true,
+      "optional": true
+    },
+    "@cargo-messages/linux-x64-gnu": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/linux-x64-gnu/-/linux-x64-gnu-0.0.160.tgz",
+      "integrity": "sha512-LQ6e7O7YYkWfDNIi/53q2QG/+lZok72LOG+NKDVCrrY4TYUcrTqWAybOV6IlkVntKPnpx8YB95umSQGeVuvhpQ==",
+      "dev": true,
+      "optional": true
+    },
+    "@cargo-messages/win32-arm64-msvc": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/win32-arm64-msvc/-/win32-arm64-msvc-0.0.160.tgz",
+      "integrity": "sha512-VDMBhyun02gIDwmEhkYP1W9Z0tYqn4drgY5Iua1qV2tYOU58RVkWhzUYxM9rzYbnwKZlltgM46J/j5QZ3VaFrA==",
+      "dev": true,
+      "optional": true
+    },
+    "@cargo-messages/win32-x64-msvc": {
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@cargo-messages/win32-x64-msvc/-/win32-x64-msvc-0.0.160.tgz",
+      "integrity": "sha512-vnoglDxF6zj0W/Co9D0H/bgnrhUuO5EumIf9v3ujLtBH94rAX11JsXh/FgC/8wQnQSsLyWSq70YxNS2wdETxjA==",
+      "dev": true,
+      "optional": true
+    },
    "@cspotcode/source-map-support": {
      "version": "0.8.1",
      "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
@@ -4642,11 +4851,50 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
+    "@lancedb/vectordb-darwin-arm64": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.19.tgz",
+      "integrity": "sha512-efQhJkBKvMNhjFq3Sw3/qHo9D9gb9UqiIr98n3STsbNxBQjMnWemXn91Ckl40siRG1O8qXcINW7Qs/EGmus+kg==",
+      "optional": true
+    },
+    "@lancedb/vectordb-darwin-x64": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.19.tgz",
+      "integrity": "sha512-r6OZNVyemAssABz2w7CRhe7dyREwBEfTytn+ux1zzTnzsgMgDovCQ0rQ3WZcxWvcy7SFCxiemA9IP1b/lsb4tQ==",
+      "optional": true
+    },
+    "@lancedb/vectordb-linux-arm64-gnu": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.1.19.tgz",
+      "integrity": "sha512-mL/hRmZp6Kw7hmGJBdOZfp/tTYiCdlOcs8DA/+nr2eiXERv0gIhyiKvr2P5DwbBmut3qXEkDalMHTo95BSdL2A==",
+      "optional": true
+    },
+    "@lancedb/vectordb-linux-x64-gnu": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.19.tgz",
+      "integrity": "sha512-AG0FHksbbr+cHVKPi4B8cmBtqb6T9E0uaK4kyZkXrX52/xtv9RYVZcykaB/tSSm0XNFPWWRnx9R8UqNZV/hxMA==",
+      "optional": true
+    },
+    "@lancedb/vectordb-win32-x64-msvc": {
+      "version": "0.1.19",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.1.19.tgz",
+      "integrity": "sha512-PDWZ2hvLVXH4Z4WIO1rsWY8ev3NpNm7aXlaey32P+l1Iz9Hia9+F2GBpp2UiEQKfvbk82ucAvBLRmpSsHY8Tlw==",
+      "optional": true
+    },
    "@neon-rs/cli": {
-      "version": "0.0.74",
-      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
-      "integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
-      "dev": true
+      "version": "0.0.160",
+      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
+      "integrity": "sha512-GQjzHPJVTOARbX3nP/fAWqBq7JlQ8XgfYlCa+iwzIXf0LC1EyfJTX+vqGD/36b9lKoyY01Z/aDUB9o/qF6ztHA==",
+      "dev": true,
+      "requires": {
+        "@cargo-messages/android-arm-eabi": "0.0.160",
+        "@cargo-messages/darwin-arm64": "0.0.160",
+        "@cargo-messages/darwin-x64": "0.0.160",
+        "@cargo-messages/linux-arm-gnueabihf": "0.0.160",
+        "@cargo-messages/linux-x64-gnu": "0.0.160",
+        "@cargo-messages/win32-arm64-msvc": "0.0.160",
+        "@cargo-messages/win32-x64-msvc": "0.0.160"
+      }
    },
    "@neon-rs/load": {
      "version": "0.0.74",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.1.15",
+  "version": "0.1.19",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -27,7 +27,7 @@
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "devDependencies": {
-    "@neon-rs/cli": "^0.0.74",
+    "@neon-rs/cli": "^0.0.160",
    "@types/chai": "^4.3.4",
    "@types/chai-as-promised": "^7.1.5",
    "@types/mocha": "^10.0.1",
@@ -70,18 +70,18 @@
  ],
  "neon": {
    "targets": {
-      "x86_64-apple-darwin": "vectordb-darwin-x64",
-      "aarch64-apple-darwin": "vectordb-darwin-arm64",
-      "x86_64-unknown-linux-gnu": "vectordb-linux-x64-gnu",
-      "aarch64-unknown-linux-gnu": "vectordb-linux-arm64-gnu",
-      "x86_64-pc-windows-msvc": "vectordb-win32-x64-msvc"
+      "x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
+      "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
+      "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
+      "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
+      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
    }
  },
  "optionalDependencies": {
-    "vectordb-darwin-arm64": "0.1.15",
-    "vectordb-darwin-x64": "0.1.15",
-    "vectordb-linux-arm64-gnu": "0.1.15",
-    "vectordb-linux-x64-gnu": "0.1.15",
-    "vectordb-win32-x64-msvc": "0.1.15"
+    "@lancedb/vectordb-darwin-arm64": "0.1.19",
+    "@lancedb/vectordb-darwin-x64": "0.1.19",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
+    "@lancedb/vectordb-linux-x64-gnu": "0.1.19",
+    "@lancedb/vectordb-win32-x64-msvc": "0.1.19"
  }
 }
--- a/node/src/embedding/embedding_function.ts
+++ b/node/src/embedding/embedding_function.ts
@@ -26,3 +26,8 @@ export interface EmbeddingFunction<T> {
     */
  embed: (data: T[]) => Promise<number[][]>
 }
+
+export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
+  return typeof value.sourceColumn === 'string' &&
+      typeof value.embed === 'function'
+}
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -20,10 +20,12 @@ import { fromRecordsToBuffer } from './arrow'
 import type { EmbeddingFunction } from './embedding/embedding_function'
 import { RemoteConnection } from './remote'
 import { Query } from './query'
+import { isEmbeddingFunction } from './embedding/embedding_function'

 // eslint-disable-next-line @typescript-eslint/no-var-requires
 const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete } = require('../native.js')

+export { Query }
 export type { EmbeddingFunction }
 export { OpenAIEmbeddingFunction } from './embedding/openai'

@@ -100,10 +102,35 @@ export interface Connection {
   *
   * @param {string} name - The name of the table.
   * @param data - Non-empty Array of Records to be inserted into the table
-   * @param {WriteMode} mode - The write mode to use when creating the table.
+   */
+  createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
+
+  /**
+   * Creates a new Table and initialize it with new data.
+   *
+   * @param {string} name - The name of the table.
+   * @param data - Non-empty Array of Records to be inserted into the table
+   * @param {WriteOptions} options - The write options to use when creating the table.
+   */
+  createTable (name: string, data: Array<Record<string, unknown>>, options: WriteOptions): Promise<Table>
+
+  /**
+   * Creates a new Table and initialize it with new data.
+   *
+   * @param {string} name - The name of the table.
+   * @param data - Non-empty Array of Records to be inserted into the table
   * @param {EmbeddingFunction} embeddings - An embedding function to use on this table
   */
-  createTable<T>(name: string, data: Array<Record<string, unknown>>, mode?: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
+  createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
+  /**
+   * Creates a new Table and initialize it with new data.
+   *
+   * @param {string} name - The name of the table.
+   * @param data - Non-empty Array of Records to be inserted into the table
+   * @param {EmbeddingFunction} embeddings - An embedding function to use on this table
+   * @param {WriteOptions} options - The write options to use when creating the table.
+   */
+  createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>, options: WriteOptions): Promise<Table<T>>

  createTableArrow(name: string, table: ArrowTable): Promise<Table>

@@ -237,32 +264,19 @@ export class LocalConnection implements Connection {
    }
  }

-  /**
-   * Creates a new Table and initialize it with new data.
-   *
-   * @param name The name of the table.
-   * @param data Non-empty Array of Records to be inserted into the Table
-   * @param mode The write mode to use when creating the table.
-   */
-  async createTable (name: string, data: Array<Record<string, unknown>>, mode?: WriteMode): Promise<Table>
-  async createTable (name: string, data: Array<Record<string, unknown>>, mode: WriteMode): Promise<Table>
-
-  /**
-   * Creates a new Table and initialize it with new data.
-   *
-   * @param name The name of the table.
-   * @param data Non-empty Array of Records to be inserted into the Table
-   * @param mode The write mode to use when creating the table.
-   * @param embeddings An embedding function to use on this Table
-   */
-  async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
-  async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
-  async createTable<T> (name: string, data: Array<Record<string, unknown>>, mode: WriteMode, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
-    if (mode === undefined) {
-      mode = WriteMode.Create
+  async createTable<T> (name: string, data: Array<Record<string, unknown>>, optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>, opt?: WriteOptions): Promise<Table<T>> {
+    let writeOptions: WriteOptions = new DefaultWriteOptions()
+    if (opt !== undefined && isWriteOptions(opt)) {
+      writeOptions = opt
+    } else if (optsOrEmbedding !== undefined && isWriteOptions(optsOrEmbedding)) {
+      writeOptions = optsOrEmbedding
    }

-    const createArgs = [this._db, name, await fromRecordsToBuffer(data, embeddings), mode.toLowerCase()]
+    let embeddings: undefined | EmbeddingFunction<T>
+    if (optsOrEmbedding !== undefined && isEmbeddingFunction(optsOrEmbedding)) {
+      embeddings = optsOrEmbedding
+    }
+    const createArgs = [this._db, name, await fromRecordsToBuffer(data, embeddings), writeOptions.writeMode?.toString()]
    if (this._options.awsCredentials !== undefined) {
      createArgs.push(this._options.awsCredentials.accessKeyId)
      createArgs.push(this._options.awsCredentials.secretKey)
@@ -459,6 +473,23 @@ export enum WriteMode {
  Append = 'append'
 }

+/**
+ * Write options when creating a Table.
+ */
+export interface WriteOptions {
+  /** A {@link WriteMode} to use on this operation */
+  writeMode?: WriteMode
+}
+
+export class DefaultWriteOptions implements WriteOptions {
+  writeMode = WriteMode.Create
+}
+
+export function isWriteOptions (value: any): value is WriteOptions {
+  return Object.keys(value).length === 1 &&
+      (value.writeMode === undefined || typeof value.writeMode === 'string')
+}
+
 /**
 * Distance metrics type.
 */
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -18,13 +18,15 @@ import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'

 export class HttpLancedbClient {
  private readonly _url: string
+  private readonly _apiKey: () => string

  public constructor (
    url: string,
-    private readonly _apiKey: string,
+    apiKey: string,
    private readonly _dbName?: string
  ) {
    this._url = url
+    this._apiKey = () => apiKey
  }

  get uri (): string {
@@ -41,7 +43,7 @@ export class HttpLancedbClient {
    filter?: string
  ): Promise<ArrowTable<any>> {
    const response = await axios.post(
-              `${this._url}/v1/table/${tableName}`,
+              `${this._url}/v1/table/${tableName}/query/`,
              {
                vector,
                k,
@@ -53,7 +55,7 @@ export class HttpLancedbClient {
              {
                headers: {
                  'Content-Type': 'application/json',
-                  'x-api-key': this._apiKey,
+                  'x-api-key': this._apiKey(),
                  ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
                },
                responseType: 'arraybuffer',
@@ -84,7 +86,7 @@ export class HttpLancedbClient {
      {
        headers: {
          'Content-Type': 'application/json',
-          'x-api-key': this._apiKey
+          'x-api-key': this._apiKey()
        },
        params,
        timeout: 10000
--- a/node/src/test/embedding/openai.ts
+++ b/node/src/test/embedding/openai.ts
@@ -16,6 +16,7 @@ import { describe } from 'mocha'
 import { assert } from 'chai'

 import { OpenAIEmbeddingFunction } from '../../embedding/openai'
+import { isEmbeddingFunction } from '../../embedding/embedding_function'

 // eslint-disable-next-line @typescript-eslint/no-var-requires
 const { OpenAIApi } = require('openai')
@@ -47,4 +48,10 @@ describe('OpenAPIEmbeddings', function () {
      assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
    })
  })
+
+  describe('isEmbeddingFunction', function () {
+    it('should match the isEmbeddingFunction guard', function () {
+      assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
+    })
+  })
 })
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -18,8 +18,7 @@ import * as chai from 'chai'
 import * as chaiAsPromised from 'chai-as-promised'

 import * as lancedb from '../index'
-import { type AwsCredentials, type EmbeddingFunction, MetricType, WriteMode } from '../index'
-import { Query } from '../query'
+import { type AwsCredentials, type EmbeddingFunction, MetricType, Query, WriteMode, DefaultWriteOptions, isWriteOptions } from '../index'

 const expect = chai.expect
 const assert = chai.assert
@@ -135,6 +134,18 @@ describe('LanceDB client', function () {
      assert.equal(await table.countRows(), 2)
    })

+    it('fails to create a new table when the vector column is missing', async function () {
+      const dir = await track().mkdir('lancejs')
+      const con = await lancedb.connect(dir)
+
+      const data = [
+        { id: 1, price: 10 }
+      ]
+
+      const create = con.createTable('missing_vector', data)
+      await expect(create).to.be.rejectedWith(Error, 'column \'vector\' is missing')
+    })
+
    it('use overwrite flag to overwrite existing table', async function () {
      const dir = await track().mkdir('lancejs')
      const con = await lancedb.connect(dir)
@@ -145,7 +156,7 @@ describe('LanceDB client', function () {
      ]

      const tableName = 'overwrite'
-      await con.createTable(tableName, data, WriteMode.Create)
+      await con.createTable(tableName, data, { writeMode: WriteMode.Create })

      const newData = [
        { id: 1, vector: [0.1, 0.2], price: 10 },
@@ -155,7 +166,7 @@ describe('LanceDB client', function () {

      await expect(con.createTable(tableName, newData)).to.be.rejectedWith(Error, 'already exists')

-      const table = await con.createTable(tableName, newData, WriteMode.Overwrite)
+      const table = await con.createTable(tableName, newData, { writeMode: WriteMode.Overwrite })
      assert.equal(table.name, tableName)
      assert.equal(await table.countRows(), 3)
    })
@@ -231,6 +242,22 @@ describe('LanceDB client', function () {
      // Default replace = true
      await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
    }).timeout(50_000)
+
+    it('it should fail when the column is not a vector', async function () {
+      const uri = await createTestDB(32, 300)
+      const con = await lancedb.connect(uri)
+      const table = await con.openTable('vectors')
+      const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
+      await expect(createIndex).to.be.rejectedWith(/VectorIndex requires the column data type to be fixed size list of float32s/)
+    })
+
+    it('it should fail when the column is not a vector', async function () {
+      const uri = await createTestDB(32, 300)
+      const con = await lancedb.connect(uri)
+      const table = await con.openTable('vectors')
+      const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 })
+      await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0')
+    })
  })

  describe('when using a custom embedding function', function () {
@@ -260,7 +287,7 @@ describe('LanceDB client', function () {
        { price: 10, name: 'foo' },
        { price: 50, name: 'bar' }
      ]
-      const table = await con.createTable('vectors', data, WriteMode.Create, embeddings)
+      const table = await con.createTable('vectors', data, embeddings, { writeMode: WriteMode.Create })
      const results = await table.search('foo').execute()
      assert.equal(results.length, 2)
    })
@@ -318,3 +345,20 @@ describe('Drop table', function () {
    assert.deepEqual(await con.tableNames(), ['t2'])
  })
 })
+
+describe('WriteOptions', function () {
+  context('#isWriteOptions', function () {
+    it('should not match empty object', function () {
+      assert.equal(isWriteOptions({}), false)
+    })
+    it('should match write options', function () {
+      assert.equal(isWriteOptions({ writeMode: WriteMode.Create }), true)
+    })
+    it('should match undefined write mode', function () {
+      assert.equal(isWriteOptions({ writeMode: undefined }), true)
+    })
+    it('should match default write options', function () {
+      assert.equal(isWriteOptions(new DefaultWriteOptions()), true)
+    })
+  })
+})
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.13
+current_version = 0.1.16
 commit = True
 message = [python] Bump version: {current_version} → {new_version}
 tag = True
--- a/python/lancedb/init.py
+++ b/python/lancedb/init.py
@@ -19,7 +19,11 @@ from .schema import vector


 def connect(
-    uri: URI, *, api_key: Optional[str] = None, region: str = "us-west-2"
+    uri: URI,
+    *,
+    api_key: Optional[str] = None,
+    region: str = "us-west-2",
+    host_override: Optional[str] = None,
 ) -> DBConnection:
    """Connect to a LanceDB database.

@@ -55,5 +59,5 @@ def connect(
    if isinstance(uri, str) and uri.startswith("db://"):
        if api_key is None:
            raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
-        return RemoteDBConnection(uri, api_key, region)
+        return RemoteDBConnection(uri, api_key, region, host_override)
    return LanceDBConnection(uri)
--- a/python/lancedb/common.py
+++ b/python/lancedb/common.py
@@ -11,17 +11,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 from pathlib import Path
-from typing import List, Union
+from typing import Iterable, List, Union

 import numpy as np
-import pandas as pd
 import pyarrow as pa

+from .util import safe_import_pandas
+
+pd = safe_import_pandas()
+
+DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
 VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
 URI = Union[str, Path]
-
-# TODO support generator
-DATA = Union[List[dict], dict, pd.DataFrame]
 VECTOR_COLUMN_NAME = "vector"


--- a/python/lancedb/context.py
+++ b/python/lancedb/context.py
@@ -12,12 +12,13 @@
 #  limitations under the License.
 from __future__ import annotations

-import pandas as pd
-
 from .exceptions import MissingColumnError, MissingValueError
+from .util import safe_import_pandas
+
+pd = safe_import_pandas()


-def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
+def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
    """Create a Contextualizer object for the given DataFrame.

    Used to create context windows. Context windows are rolling subsets of text
@@ -175,8 +176,12 @@ class Contextualizer:
        self._min_window_size = min_window_size
        return self

-    def to_df(self) -> pd.DataFrame:
+    def to_df(self) -> "pd.DataFrame":
        """Create the context windows and return a DataFrame."""
+        if pd is None:
+            raise ImportError(
+                "pandas is required to create context windows using lancedb"
+            )

        if self._text_col not in self._raw_df.columns.tolist():
            raise MissingColumnError(self._text_col)
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -16,9 +16,8 @@ from __future__ import annotations
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Tuple, Union
+from typing import Optional

-import pandas as pd
 import pyarrow as pa
 from pyarrow import fs

@@ -39,9 +38,7 @@ class DBConnection(ABC):
    def create_table(
        self,
        name: str,
-        data: Optional[
-            Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
-        ] = None,
+        data: Optional[DATA] = None,
        schema: Optional[pa.Schema] = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -279,7 +276,7 @@ class LanceDBConnection(DBConnection):
    def create_table(
        self,
        name: str,
-        data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
+        data: Optional[DATA] = None,
        schema: pa.Schema = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -319,14 +316,20 @@ class LanceDBConnection(DBConnection):
        """
        return LanceTable.open(self, name)

-    def drop_table(self, name: str):
+    def drop_table(self, name: str, ignore_missing: bool = False):
        """Drop a table from the database.

        Parameters
        ----------
        name: str
            The name of the table.
+        ignore_missing: bool, default False
+            If True, ignore if the table does not exist.
        """
-        filesystem, path = pa.fs.FileSystem.from_uri(self.uri)
-        table_path = os.path.join(path, name + ".lance")
-        filesystem.delete_dir(table_path)
+        try:
+            filesystem, path = fs_from_uri(self.uri)
+            table_path = os.path.join(path, name + ".lance")
+            filesystem.delete_dir(table_path)
+        except FileNotFoundError:
+            if not ignore_missing:
+                raise
--- a/python/lancedb/embeddings.py
+++ b/python/lancedb/embeddings.py
@@ -16,15 +16,19 @@ import sys
 from typing import Callable, Union

 import numpy as np
-import pandas as pd
 import pyarrow as pa
 from lance.vector import vec_to_table
 from retry import retry

+from .util import safe_import_pandas
+
+pd = safe_import_pandas()
+DATA = Union[pa.Table, "pd.DataFrame"]
+

 def with_embeddings(
    func: Callable,
-    data: Union[pa.Table, pd.DataFrame],
+    data: DATA,
    column: str = "text",
    wrap_api: bool = True,
    show_progress: bool = False,
@@ -60,7 +64,7 @@ def with_embeddings(
    func = func.batch_size(batch_size)
    if show_progress:
        func = func.show_progress()
-    if isinstance(data, pd.DataFrame):
+    if pd is not None and isinstance(data, pd.DataFrame):
        data = pa.Table.from_pandas(data, preserve_index=False)
    embeddings = func(data[column].to_numpy())
    table = vec_to_table(np.array(embeddings))
--- a/python/lancedb/pydantic.py
+++ b/python/lancedb/pydantic.py
@@ -249,3 +249,36 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
    """
    fields = _pydantic_model_to_fields(model)
    return pa.schema(fields)
+
+
+class LanceModel(pydantic.BaseModel):
+    """
+    A Pydantic Model base class that can be converted to a LanceDB Table.
+
+    Examples
+    --------
+    >>> import lancedb
+    >>> from lancedb.pydantic import LanceModel, vector
+    >>>
+    >>> class TestModel(LanceModel):
+    ...     name: str
+    ...     vector: vector(2)
+    ...
+    >>> db = lancedb.connect("/tmp")
+    >>> table = db.create_table("test", schema=TestModel.to_arrow_schema())
+    >>> table.add([
+    ...     TestModel(name="test", vector=[1.0, 2.0])
+    ... ])
+    >>> table.search([0., 0.]).limit(1).to_pydantic(TestModel)
+    [TestModel(name='test', vector=FixedSizeList(dim=2))]
+    """
+
+    @classmethod
+    def to_arrow_schema(cls):
+        return pydantic_to_schema(cls)
+
+    @classmethod
+    def field_names(cls) -> List[str]:
+        if PYDANTIC_VERSION.major < 2:
+            return list(cls.__fields__.keys())
+        return list(cls.model_fields.keys())
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -13,17 +13,20 @@

 from __future__ import annotations

-from typing import List, Literal, Optional, Union
+from typing import List, Literal, Optional, Type, Union

 import numpy as np
-import pandas as pd
 import pyarrow as pa
-from pydantic import BaseModel
+import pydantic

 from .common import VECTOR_COLUMN_NAME
+from .pydantic import LanceModel
+from .util import safe_import_pandas
+
+pd = safe_import_pandas()


-class Query(BaseModel):
+class Query(pydantic.BaseModel):
    """A Query"""

    vector_column: str = VECTOR_COLUMN_NAME
@@ -198,7 +201,7 @@ class LanceQueryBuilder:
        self._refine_factor = refine_factor
        return self

-    def to_df(self) -> pd.DataFrame:
+    def to_df(self) -> "pd.DataFrame":
        """
        Execute the query and return the results as a pandas DataFrame.
        In addition to the selected columns, LanceDB also returns a vector
@@ -230,9 +233,26 @@ class LanceQueryBuilder:
        )
        return self._table._execute_query(query)

+    def to_pydantic(self, model: Type[LanceModel]) -> List[LanceModel]:
+        """Return the table as a list of pydantic models.
+
+        Parameters
+        ----------
+        model: Type[LanceModel]
+            The pydantic model to use.
+
+        Returns
+        -------
+        List[LanceModel]
+        """
+        return [
+            model(**{k: v for k, v in row.items() if k in model.field_names()})
+            for row in self.to_arrow().to_pylist()
+        ]
+

 class LanceFtsQueryBuilder(LanceQueryBuilder):
-    def to_arrow(self) -> pd.Table:
+    def to_arrow(self) -> pa.Table:
        try:
            import tantivy
        except ImportError:
--- a/python/lancedb/remote/client.py
+++ b/python/lancedb/remote/client.py
@@ -48,11 +48,16 @@ class RestfulLanceDBClient:
    db_name: str
    region: str
    api_key: Credential
+    host_override: Optional[str] = attr.field(default=None)
+
    closed: bool = attr.field(default=False, init=False)

    @functools.cached_property
    def session(self) -> aiohttp.ClientSession:
-        url = f"https://{self.db_name}.{self.region}.api.lancedb.com"
+        url = (
+            self.host_override
+            or f"https://{self.db_name}.{self.region}.api.lancedb.com"
+        )
        return aiohttp.ClientSession(url)

    async def close(self):
@@ -66,6 +71,8 @@ class RestfulLanceDBClient:
        }
        if self.region == "local":  # Local test mode
            headers["Host"] = f"{self.db_name}.{self.region}.api.lancedb.com"
+        if self.host_override:
+            headers["x-lancedb-database"] = self.db_name
        return headers

    @staticmethod
@@ -98,7 +105,7 @@ class RestfulLanceDBClient:
    async def post(
        self,
        uri: str,
-        data: Union[Dict[str, Any], BaseModel, bytes],
+        data: Optional[Union[Dict[str, Any], BaseModel, bytes]] = None,
        params: Optional[Dict[str, Any]] = None,
        content_type: Optional[str] = None,
        deserialize: Callable = lambda resp: resp.json(),
@@ -141,5 +148,7 @@ class RestfulLanceDBClient:
    @_check_not_closed
    async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
        """Query a table."""
-        tbl = await self.post(f"/v1/table/{table_name}/", query, deserialize=_read_ipc)
+        tbl = await self.post(
+            f"/v1/table/{table_name}/query/", query, deserialize=_read_ipc
+        )
        return VectorQueryResult(tbl)
--- a/python/lancedb/remote/db.py
+++ b/python/lancedb/remote/db.py
@@ -13,14 +13,13 @@

 import asyncio
 import uuid
-from typing import List
+from typing import List, Optional
 from urllib.parse import urlparse

 import pyarrow as pa

 from lancedb.common import DATA
 from lancedb.db import DBConnection
-from lancedb.schema import schema_to_json
 from lancedb.table import Table, _sanitize_data

 from .arrow import to_ipc_binary
@@ -30,14 +29,22 @@ from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
 class RemoteDBConnection(DBConnection):
    """A connection to a remote LanceDB database."""

-    def __init__(self, db_url: str, api_key: str, region: str):
+    def __init__(
+        self,
+        db_url: str,
+        api_key: str,
+        region: str,
+        host_override: Optional[str] = None,
+    ):
        """Connect to a remote LanceDB database."""
        parsed = urlparse(db_url)
        if parsed.scheme != "db":
            raise ValueError(f"Invalid scheme: {parsed.scheme}, only accepts db://")
        self.db_name = parsed.netloc
        self.api_key = api_key
-        self._client = RestfulLanceDBClient(self.db_name, region, api_key)
+        self._client = RestfulLanceDBClient(
+            self.db_name, region, api_key, host_override
+        )
        try:
            self._loop = asyncio.get_running_loop()
        except RuntimeError:
@@ -95,7 +102,7 @@ class RemoteDBConnection(DBConnection):

        self._loop.run_until_complete(
            self._client.post(
-                f"/v1/table/{name}/create",
+                f"/v1/table/{name}/create/",
                data=data,
                params={"request_id": request_id},
                content_type=ARROW_STREAM_CONTENT_TYPE,
--- a/python/lancedb/remote/table.py
+++ b/python/lancedb/remote/table.py
@@ -16,11 +16,11 @@ from functools import cached_property
 from typing import Union

 import pyarrow as pa
+from lance import json_to_schema

 from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME

-from ..query import LanceQueryBuilder, Query
-from ..schema import json_to_schema
+from ..query import LanceQueryBuilder
 from ..table import Query, Table, _sanitize_data
 from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE
@@ -33,13 +33,13 @@ class RemoteTable(Table):
        self._name = name

    def __repr__(self) -> str:
-        return f"RemoteTable({self._conn.db_name}.{self.name})"
+        return f"RemoteTable({self._conn.db_name}.{self._name})"

    @cached_property
    def schema(self) -> pa.Schema:
        """Return the schema of the table."""
        resp = self._conn._loop.run_until_complete(
-            self._conn._client.get(f"/v1/table/{self._name}/describe")
+            self._conn._client.post(f"/v1/table/{self._name}/describe/")
        )
        schema = json_to_schema(resp["schema"])
        return schema
@@ -73,7 +73,7 @@ class RemoteTable(Table):

        self._conn._loop.run_until_complete(
            self._conn._client.post(
-                f"/v1/table/{self._name}/insert",
+                f"/v1/table/{self._name}/insert/",
                data=payload,
                params={"request_id": request_id, "mode": mode},
                content_type=ARROW_STREAM_CONTENT_TYPE,
--- a/python/lancedb/schema.py
+++ b/python/lancedb/schema.py
@@ -12,11 +12,7 @@
 #  limitations under the License.

 """Schema related utilities."""
-
-from typing import Any, Dict, Type
-
 import pyarrow as pa
-from lance import json_to_schema, schema_to_json


 def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -20,26 +20,32 @@ from typing import Iterable, List, Union

 import lance
 import numpy as np
-import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
-import pyarrow.fs
 from lance import LanceDataset
 from lance.vector import vec_to_table

 from .common import DATA, VEC, VECTOR_COLUMN_NAME
+from .pydantic import LanceModel
 from .query import LanceFtsQueryBuilder, LanceQueryBuilder, Query
+from .util import fs_from_uri, safe_import_pandas
+
+pd = safe_import_pandas()


 def _sanitize_data(data, schema, on_bad_vectors, fill_value):
    if isinstance(data, list):
+        # convert to list of dict if data is a bunch of LanceModels
+        if isinstance(data[0], LanceModel):
+            schema = data[0].__class__.to_arrow_schema()
+            data = [dict(d) for d in data]
        data = pa.Table.from_pylist(data)
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
        )
    if isinstance(data, dict):
        data = vec_to_table(data)
-    if isinstance(data, pd.DataFrame):
+    if pd is not None and isinstance(data, pd.DataFrame):
        data = pa.Table.from_pandas(data)
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
@@ -94,7 +100,7 @@ class Table(ABC):
        """
        raise NotImplementedError

-    def to_pandas(self) -> pd.DataFrame:
+    def to_pandas(self):
        """Return the table as a pandas DataFrame.

        Returns
@@ -328,7 +334,7 @@ class LanceTable(Table):
        """Return the first n rows of the table."""
        return self._dataset.head(n)

-    def to_pandas(self) -> pd.DataFrame:
+    def to_pandas(self) -> "pd.DataFrame":
        """Return the table as a pandas DataFrame.

        Returns
@@ -527,7 +533,7 @@ class LanceTable(Table):
    @classmethod
    def open(cls, db, name):
        tbl = cls(db, name)
-        fs, path = pa.fs.FileSystem.from_uri(tbl._dataset_uri)
+        fs, path = fs_from_uri(tbl._dataset_uri)
        file_info = fs.get_file_info(path)
        if file_info.type != pa.fs.FileType.Directory:
            raise FileNotFoundError(
--- a/python/lancedb/util.py
+++ b/python/lancedb/util.py
@@ -15,7 +15,6 @@ import os
 from typing import Tuple
 from urllib.parse import urlparse

-import pyarrow as pa
 import pyarrow.fs as pa_fs


@@ -71,7 +70,17 @@ def fs_from_uri(uri: str) -> Tuple[pa_fs.FileSystem, str]:
    Get a PyArrow FileSystem from a URI, handling extra environment variables.
    """
    if get_uri_scheme(uri) == "s3":
-        if os.environ["AWS_ENDPOINT"]:
-            uri += "?endpoint_override=" + os.environ["AWS_ENDPOINT"]
+        fs = pa_fs.S3FileSystem(endpoint_override=os.environ.get("AWS_ENDPOINT"))
+        path = get_uri_location(uri)
+        return fs, path

    return pa_fs.FileSystem.from_uri(uri)
+
+
+def safe_import_pandas():
+    try:
+        import pandas as pd
+
+        return pd
+    except ImportError:
+        return None
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "lancedb"
-version = "0.1.13"
-dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
+version = "0.1.16"
+dependencies = ["pylance==0.5.10", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic", "attr", "semver"]
 description = "lancedb"
 authors = [
    { name = "LanceDB Devs", email = "dev@lancedb.com" },
@@ -37,7 +37,7 @@ repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 tests = [
-    "pytest", "pytest-mock", "pytest-asyncio"
+    "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio"
 ]
 dev = [
    "ruff", "pre-commit", "black"
--- a/python/tests/test_db.py
+++ b/python/tests/test_db.py
@@ -149,6 +149,10 @@ def test_delete_table(tmp_path):
    db.create_table("test", data=data)
    assert db.table_names() == ["test"]

+    # dropping a table that does not exist should pass
+    # if ignore_missing=True
+    db.drop_table("does_not_exist", ignore_missing=True)
+

 def test_empty_or_nonexistent_table(tmp_path):
    db = lancedb.connect(tmp_path)
--- a/python/tests/test_pydantic.py
+++ b/python/tests/test_pydantic.py
@@ -20,7 +20,7 @@ import pyarrow as pa
 import pydantic
 import pytest

-from lancedb.pydantic import PYDANTIC_VERSION, pydantic_to_schema, vector
+from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, pydantic_to_schema, vector


@pytest.mark.skipif(
@@ -163,3 +163,13 @@ def test_fixed_size_list_validation():
        TestModel(vec=range(7))

    TestModel(vec=range(8))
+
+
+def test_lance_model():
+    class TestModel(LanceModel):
+        vec: vector(16)
+        li: List[int]
+
+    schema = pydantic_to_schema(TestModel)
+    assert schema == TestModel.to_arrow_schema()
+    assert TestModel.field_names() == ["vec", "li"]
--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -20,6 +20,7 @@ import pyarrow as pa
 import pytest

 from lancedb.db import LanceDBConnection
+from lancedb.pydantic import LanceModel, vector
 from lancedb.query import LanceQueryBuilder, Query
 from lancedb.table import LanceTable

@@ -64,6 +65,24 @@ def table(tmp_path) -> MockTable:
    return MockTable(tmp_path)


+def test_cast(table):
+    class TestModel(LanceModel):
+        vector: vector(2)
+        id: int
+        str_field: str
+        float_field: float
+
+    q = LanceQueryBuilder(table, [0, 0], "vector").limit(1)
+    results = q.to_pydantic(TestModel)
+    assert len(results) == 1
+    r0 = results[0]
+    assert isinstance(r0, TestModel)
+    assert r0.id == 1
+    assert r0.vector == [1, 2]
+    assert r0.str_field == "a"
+    assert r0.float_field == 1.0
+
+
 def test_query_builder(table):
    df = LanceQueryBuilder(table, [0, 0], "vector").limit(1).select(["id"]).to_df()
    assert df["id"].values[0] == 1
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -13,15 +13,16 @@

 import functools
 from pathlib import Path
+from typing import List
 from unittest.mock import PropertyMock, patch

 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
-from lance.vector import vec_to_table

 from lancedb.db import LanceDBConnection
+from lancedb.pydantic import LanceModel, vector
 from lancedb.table import LanceTable


@@ -135,6 +136,17 @@ def test_add(db):
    _add(table, schema)


+def test_add_pydantic_model(db):
+    class TestModel(LanceModel):
+        vector: vector(16)
+        li: List[int]
+
+    data = TestModel(vector=list(range(16)), li=[1, 2, 3])
+    table = LanceTable.create(db, "test", data=[data])
+    assert len(table) == 1
+    assert table.schema == TestModel.to_arrow_schema()
+
+
 def _add(table, schema):
    # table = LanceTable(db, "test")
    assert len(table) == 2
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectordb-node"
-version = "0.1.15"
+version = "0.1.19"
 description = "Serverless, low-latency vector database for AI applications"
 license = "Apache-2.0"
 edition = "2018"
@@ -13,6 +13,7 @@ crate-type = ["cdylib"]
 arrow-array = { workspace = true }
 arrow-ipc = { workspace = true }
 arrow-schema = { workspace = true }
+conv = "0.3.3"
 once_cell = "1"
 futures = "0.3"
 half = { workspace = true }
@@ -21,5 +22,6 @@ vectordb = { path = "../../vectordb" }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
 neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
 object_store = { workspace = true, features = ["aws"] }
+snafu = { workspace = true }
 async-trait = "0"
 env_logger = "0"
--- a/rust/ffi/node/src/arrow.rs
+++ b/rust/ffi/node/src/arrow.rs
@@ -13,27 +13,30 @@
 // limitations under the License.

 use std::io::Cursor;
+use std::ops::Deref;
 use std::sync::Arc;

 use arrow_array::cast::as_list_array;
-use arrow_array::{Array, FixedSizeListArray, RecordBatch};
+use arrow_array::{Array, ArrayRef, FixedSizeListArray, RecordBatch};
 use arrow_ipc::reader::FileReader;
+use arrow_ipc::writer::FileWriter;
 use arrow_schema::{DataType, Field, Schema};
 use lance::arrow::{FixedSizeListArrayExt, RecordBatchExt};
+use vectordb::table::VECTOR_COLUMN_NAME;

-pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
-    let column = record_batch
-        .column_by_name("vector")
-        .cloned()
-        .expect("vector column is missing");
-    // TODO: we should just consume the underlaying js buffer in the future instead of this arrow around a bunch of times
+use crate::error::{MissingColumnSnafu, Result};
+use snafu::prelude::*;
+
+pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> Result<RecordBatch> {
+    let column = get_column(VECTOR_COLUMN_NAME, &record_batch)?;
+
+    // TODO: we should just consume the underlying js buffer in the future instead of this arrow around a bunch of times
    let arr = as_list_array(column.as_ref());
    let list_size = arr.values().len() / record_batch.num_rows();
-    let r =
-        FixedSizeListArray::try_new_from_values(arr.values().to_owned(), list_size as i32).unwrap();
+    let r = FixedSizeListArray::try_new_from_values(arr.values().to_owned(), list_size as i32)?;

    let schema = Arc::new(Schema::new(vec![Field::new(
-        "vector",
+        VECTOR_COLUMN_NAME,
        DataType::FixedSizeList(
            Arc::new(Field::new("item", DataType::Float32, true)),
            list_size as i32,
@@ -41,22 +44,42 @@ pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
        true,
    )]));

-    let mut new_batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(r)]).unwrap();
+    let mut new_batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(r)])?;

    if record_batch.num_columns() > 1 {
-        let rb = record_batch.drop_column("vector").unwrap();
-        new_batch = new_batch.merge(&rb).unwrap();
+        let rb = record_batch.drop_column(VECTOR_COLUMN_NAME)?;
+        new_batch = new_batch.merge(&rb)?;
    }
-    new_batch
+    Ok(new_batch)
 }

-pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Vec<RecordBatch> {
+fn get_column(column_name: &str, record_batch: &RecordBatch) -> Result<ArrayRef> {
+    record_batch
+        .column_by_name(column_name)
+        .cloned()
+        .context(MissingColumnSnafu { name: column_name })
+}
+
+pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Result<Vec<RecordBatch>> {
    let mut batches: Vec<RecordBatch> = Vec::new();
-    let fr = FileReader::try_new(Cursor::new(slice), None);
-    let file_reader = fr.unwrap();
+    let file_reader = FileReader::try_new(Cursor::new(slice), None)?;
    for b in file_reader {
-        let record_batch = convert_record_batch(b.unwrap());
+        let record_batch = convert_record_batch(b?)?;
        batches.push(record_batch);
    }
-    batches
+    Ok(batches)
+}
+
+pub(crate) fn record_batch_to_buffer(batches: Vec<RecordBatch>) -> Result<Vec<u8>> {
+    if batches.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let schema = batches.get(0).unwrap().schema();
+    let mut fr = FileWriter::try_new(Vec::new(), schema.deref())?;
+    for batch in batches.iter() {
+        fr.write(batch)?
+    }
+    fr.finish()?;
+    Ok(fr.into_inner()?)
 }
--- a/rust/ffi/node/src/error.rs
+++ b/rust/ffi/node/src/error.rs
@@ -0,0 +1,88 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use arrow_schema::ArrowError;
+use neon::context::Context;
+use neon::prelude::NeonResult;
+use snafu::Snafu;
+
+#[derive(Debug, Snafu)]
+#[snafu(visibility(pub(crate)))]
+pub enum Error {
+    #[snafu(display("column '{name}' is missing"))]
+    MissingColumn { name: String },
+    #[snafu(display("{name}: {message}"))]
+    RangeError { name: String, message: String },
+    #[snafu(display("{index_type} is not a valid index type"))]
+    InvalidIndexType { index_type: String },
+
+    #[snafu(display("{message}"))]
+    LanceDB { message: String },
+    #[snafu(display("{message}"))]
+    Neon { message: String },
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+impl From<vectordb::error::Error> for Error {
+    fn from(e: vectordb::error::Error) -> Self {
+        Self::LanceDB {
+            message: e.to_string(),
+        }
+    }
+}
+
+impl From<lance::Error> for Error {
+    fn from(e: lance::Error) -> Self {
+        Self::LanceDB {
+            message: e.to_string(),
+        }
+    }
+}
+
+impl From<ArrowError> for Error {
+    fn from(value: ArrowError) -> Self {
+        Self::LanceDB {
+            message: value.to_string(),
+        }
+    }
+}
+
+impl From<neon::result::Throw> for Error {
+    fn from(value: neon::result::Throw) -> Self {
+        Self::Neon {
+            message: value.to_string(),
+        }
+    }
+}
+
+/// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
+/// so it can be returned as a JavaScript error
+/// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
+pub trait ResultExt<T> {
+    fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T>;
+}
+
+/// Implement ResultExt for the std Result so it can be used any Result type
+impl<T, E> ResultExt<T> for std::result::Result<T, E>
+where
+    E: std::fmt::Display,
+{
+    fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T> {
+        match self {
+            Ok(value) => Ok(value),
+            Err(error) => cx.throw_error(error.to_string()),
+        }
+    }
+}
--- a/rust/ffi/node/src/index/vector.rs
+++ b/rust/ffi/node/src/index/vector.rs
@@ -22,12 +22,15 @@ use neon::prelude::*;

 use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};

+use crate::error::Error::InvalidIndexType;
+use crate::error::ResultExt;
+use crate::neon_ext::js_object_ext::JsObjectExt;
 use crate::{runtime, JsTable};

 pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
    let index_params = cx.argument::<JsObject>(0)?;
-    let index_params_builder = get_index_params_builder(&mut cx, index_params).unwrap();
+    let index_params_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;

    let rt = runtime(&mut cx)?;
    let channel = cx.channel();
@@ -54,27 +57,21 @@ pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsP
 fn get_index_params_builder(
    cx: &mut FunctionContext,
    obj: Handle<JsObject>,
-) -> Result<impl VectorIndexBuilder, String> {
-    let idx_type = obj
-        .get::<JsString, _, _>(cx, "type")
-        .map_err(|t| t.to_string())?
-        .value(cx);
+) -> crate::error::Result<impl VectorIndexBuilder> {
+    let idx_type = obj.get::<JsString, _, _>(cx, "type")?.value(cx);

    match idx_type.as_str() {
        "ivf_pq" => {
            let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
            let mut pq_params = PQBuildParams::default();

-            obj.get_opt::<JsString, _, _>(cx, "column")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsString, _, _>(cx, "column")?
                .map(|s| index_builder.column(s.value(cx)));

-            obj.get_opt::<JsString, _, _>(cx, "index_name")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsString, _, _>(cx, "index_name")?
                .map(|s| index_builder.index_name(s.value(cx)));

-            obj.get_opt::<JsString, _, _>(cx, "metric_type")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsString, _, _>(cx, "metric_type")?
                .map(|s| MetricType::try_from(s.value(cx).as_str()))
                .map(|mt| {
                    let metric_type = mt.unwrap();
@@ -82,15 +79,8 @@ fn get_index_params_builder(
                    pq_params.metric_type = metric_type;
                });

-            let num_partitions = obj
-                .get_opt::<JsNumber, _, _>(cx, "num_partitions")
-                .map_err(|t| t.to_string())?
-                .map(|s| s.value(cx) as usize);
-
-            let max_iters = obj
-                .get_opt::<JsNumber, _, _>(cx, "max_iters")
-                .map_err(|t| t.to_string())?
-                .map(|s| s.value(cx) as usize);
+            let num_partitions = obj.get_opt_usize(cx, "num_partitions")?;
+            let max_iters = obj.get_opt_usize(cx, "max_iters")?;

            num_partitions.map(|np| {
                let max_iters = max_iters.unwrap_or(50);
@@ -102,32 +92,28 @@ fn get_index_params_builder(
                index_builder.ivf_params(ivf_params)
            });

-            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")?
                .map(|s| pq_params.use_opq = s.value(cx));

-            obj.get_opt::<JsNumber, _, _>(cx, "num_sub_vectors")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.num_sub_vectors = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "num_sub_vectors")?
+                .map(|s| pq_params.num_sub_vectors = s);

-            obj.get_opt::<JsNumber, _, _>(cx, "num_bits")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.num_bits = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "num_bits")?
+                .map(|s| pq_params.num_bits = s);

-            obj.get_opt::<JsNumber, _, _>(cx, "max_iters")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.max_iters = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "max_iters")?
+                .map(|s| pq_params.max_iters = s);

-            obj.get_opt::<JsNumber, _, _>(cx, "max_opq_iters")
-                .map_err(|t| t.to_string())?
-                .map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
+            obj.get_opt_usize(cx, "max_opq_iters")?
+                .map(|s| pq_params.max_opq_iters = s);

-            obj.get_opt::<JsBoolean, _, _>(cx, "replace")
-                .map_err(|t| t.to_string())?
+            obj.get_opt::<JsBoolean, _, _>(cx, "replace")?
                .map(|s| index_builder.replace(s.value(cx)));

            Ok(index_builder)
        }
-        t => Err(format!("{} is not a valid index type", t).to_string()),
+        index_type => Err(InvalidIndexType {
+            index_type: index_type.into(),
+        }),
    }
 }
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -18,7 +18,6 @@ use std::ops::Deref;
 use std::sync::{Arc, Mutex};

 use arrow_array::{Float32Array, RecordBatchIterator};
-use arrow_ipc::writer::FileWriter;
 use async_trait::async_trait;
 use futures::{TryFutureExt, TryStreamExt};
 use lance::dataset::{WriteMode, WriteParams};
@@ -32,14 +31,17 @@ use once_cell::sync::OnceCell;
 use tokio::runtime::Runtime;

 use vectordb::database::Database;
-use vectordb::error::Error;
 use vectordb::table::{ReadParams, Table};

-use crate::arrow::arrow_buffer_to_record_batch;
+use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
+use crate::error::ResultExt;
+use crate::neon_ext::js_object_ext::JsObjectExt;

 mod arrow;
 mod convert;
+mod error;
 mod index;
+mod neon_ext;

 struct JsDatabase {
    database: Arc<Database>,
@@ -54,7 +56,7 @@ struct JsTable {
 impl Finalize for JsTable {}

 // TODO: object_store didn't export this type so I copied it.
-// Make a requiest to object_store to export this type
+// Make a request to object_store to export this type
 #[derive(Debug)]
 pub struct StaticCredentialProvider<T> {
    credential: Arc<T>,
@@ -86,7 +88,7 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {

    LOG.get_or_init(|| env_logger::init());

-    RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string())))
+    RUNTIME.get_or_try_init(|| Runtime::new().or_throw(cx))
 }

 fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
@@ -101,7 +103,7 @@ fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {

        deferred.settle_with(&channel, move |mut cx| {
            let db = JsDatabase {
-                database: Arc::new(database.or_else(|err| cx.throw_error(err.to_string()))?),
+                database: Arc::new(database.or_throw(&mut cx)?),
            };
            Ok(cx.boxed(db))
        });
@@ -123,7 +125,7 @@ fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
        let tables_rst = database.table_names().await;

        deferred.settle_with(&channel, move |mut cx| {
-            let tables = tables_rst.or_else(|err| cx.throw_error(err.to_string()))?;
+            let tables = tables_rst.or_throw(&mut cx)?;
            let table_names = convert::vec_str_to_array(&tables, &mut cx);
            table_names
        });
@@ -194,9 +196,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
        let table_rst = database.open_table_with_params(&table_name, &params).await;

        deferred.settle_with(&channel, move |mut cx| {
-            let table = Arc::new(Mutex::new(
-                table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
-            ));
+            let table = Arc::new(Mutex::new(table_rst.or_throw(&mut cx)?));
            Ok(cx.boxed(JsTable { table }))
        });
    });
@@ -217,7 +217,7 @@ fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
    rt.spawn(async move {
        let result = database.drop_table(&table_name).await;
        deferred.settle_with(&channel, move |mut cx| {
-            result.or_else(|err| cx.throw_error(err.to_string()))?;
+            result.or_throw(&mut cx)?;
            Ok(cx.null())
        });
    });
@@ -246,12 +246,9 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
        .get_opt::<JsString, _, _>(&mut cx, "_filter")?
        .map(|s| s.value(&mut cx));
    let refine_factor = query_obj
-        .get_opt::<JsNumber, _, _>(&mut cx, "_refineFactor")?
-        .map(|s| s.value(&mut cx))
-        .map(|i| i as u32);
-    let nprobes = query_obj
-        .get::<JsNumber, _, _>(&mut cx, "_nprobes")?
-        .value(&mut cx) as usize;
+        .get_opt_u32(&mut cx, "_refineFactor")
+        .or_throw(&mut cx)?;
+    let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
    let metric_type = query_obj
        .get_opt::<JsString, _, _>(&mut cx, "_metricType")?
        .map(|s| s.value(&mut cx))
@@ -278,30 +275,17 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
            .select(select);
        let record_batch_stream = builder.execute();
        let results = record_batch_stream
-            .and_then(|stream| stream.try_collect::<Vec<_>>().map_err(Error::from))
+            .and_then(|stream| {
+                stream
+                    .try_collect::<Vec<_>>()
+                    .map_err(vectordb::error::Error::from)
+            })
            .await;

        deferred.settle_with(&channel, move |mut cx| {
-            let results = results.or_else(|err| cx.throw_error(err.to_string()))?;
-            let vector: Vec<u8> = Vec::new();
-
-            if results.is_empty() {
-                return cx.buffer(0);
-            }
-
-            let schema = results.get(0).unwrap().schema();
-            let mut fr = FileWriter::try_new(vector, schema.deref())
-                .or_else(|err| cx.throw_error(err.to_string()))?;
-
-            for batch in results.iter() {
-                fr.write(batch)
-                    .or_else(|err| cx.throw_error(err.to_string()))?;
-            }
-            fr.finish().or_else(|err| cx.throw_error(err.to_string()))?;
-            let buf = fr
-                .into_inner()
-                .or_else(|err| cx.throw_error(err.to_string()))?;
-            Ok(JsBuffer::external(&mut cx, buf))
+            let results = results.or_throw(&mut cx)?;
+            let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
+            Ok(JsBuffer::external(&mut cx, buffer))
        });
    });
    Ok(promise)
@@ -313,7 +297,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
        .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
    let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
    let buffer = cx.argument::<JsBuffer>(1)?;
-    let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
+    let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx)).or_throw(&mut cx)?;
    let schema = batches[0].schema();

    // Write mode
@@ -351,9 +335,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
            .await;

        deferred.settle_with(&channel, move |mut cx| {
-            let table = Arc::new(Mutex::new(
-                table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
-            ));
+            let table = Arc::new(Mutex::new(table_rst.or_throw(&mut cx)?));
            Ok(cx.boxed(JsTable { table }))
        });
    });
@@ -370,7 +352,8 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
    let buffer = cx.argument::<JsBuffer>(0)?;
    let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
-    let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
+
+    let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx)).or_throw(&mut cx)?;
    let schema = batches[0].schema();

    let rt = runtime(&mut cx)?;
@@ -399,7 +382,7 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
        let add_result = table.lock().unwrap().add(batch_reader, Some(params)).await;

        deferred.settle_with(&channel, move |mut cx| {
-            let _added = add_result.or_else(|err| cx.throw_error(err.to_string()))?;
+            let _added = add_result.or_throw(&mut cx)?;
            Ok(cx.boolean(true))
        });
    });
@@ -418,7 +401,7 @@ fn table_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
        let num_rows_result = table.lock().unwrap().count_rows().await;

        deferred.settle_with(&channel, move |mut cx| {
-            let num_rows = num_rows_result.or_else(|err| cx.throw_error(err.to_string()))?;
+            let num_rows = num_rows_result.or_throw(&mut cx)?;
            Ok(cx.number(num_rows as f64))
        });
    });
@@ -438,7 +421,7 @@ fn table_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let delete_result = rt.block_on(async move { table.lock().unwrap().delete(&predicate).await });

    deferred.settle_with(&channel, move |mut cx| {
-        delete_result.or_else(|err| cx.throw_error(err.to_string()))?;
+        delete_result.or_throw(&mut cx)?;
        Ok(cx.undefined())
    });

--- a/rust/ffi/node/src/neon_ext.rs
+++ b/rust/ffi/node/src/neon_ext.rs
@@ -0,0 +1,15 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod js_object_ext;
--- a/rust/ffi/node/src/neon_ext/js_object_ext.rs
+++ b/rust/ffi/node/src/neon_ext/js_object_ext.rs
@@ -0,0 +1,82 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::error::{Error, Result};
+use neon::prelude::*;
+
+// extends neon's [JsObject] with helper functions to extract properties
+pub trait JsObjectExt {
+    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
+    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
+    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
+}
+
+impl JsObjectExt for JsObject {
+    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>> {
+        let val_opt = self
+            .get_opt::<JsNumber, _, _>(cx, key)?
+            .map(|s| f64_to_u32_safe(s.value(cx), key));
+        val_opt.transpose()
+    }
+
+    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize> {
+        let val = self.get::<JsNumber, _, _>(cx, key)?.value(cx);
+        f64_to_usize_safe(val, key)
+    }
+
+    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>> {
+        let val_opt = self
+            .get_opt::<JsNumber, _, _>(cx, key)?
+            .map(|s| f64_to_usize_safe(s.value(cx), key));
+        val_opt.transpose()
+    }
+}
+
+fn f64_to_u32_safe(n: f64, key: &str) -> Result<u32> {
+    use conv::*;
+
+    n.approx_as::<u32>().map_err(|e| match e {
+        FloatError::NegOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: "must be > 0".to_string(),
+        },
+        FloatError::PosOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: format!("must be < {}", u32::MAX),
+        },
+        FloatError::NotANumber(_) => Error::RangeError {
+            name: key.into(),
+            message: "not a valid number".to_string(),
+        },
+    })
+}
+
+fn f64_to_usize_safe(n: f64, key: &str) -> Result<usize> {
+    use conv::*;
+
+    n.approx_as::<usize>().map_err(|e| match e {
+        FloatError::NegOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: "must be > 0".to_string(),
+        },
+        FloatError::PosOverflow(_) => Error::RangeError {
+            name: key.into(),
+            message: format!("must be < {}", usize::MAX),
+        },
+        FloatError::NotANumber(_) => Error::RangeError {
+            name: key.into(),
+            message: "not a valid number".to_string(),
+        },
+    })
+}
--- a/rust/vectordb/Cargo.toml
+++ b/rust/vectordb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectordb"
-version = "0.1.15"
+version = "0.1.19"
 edition = "2021"
 description = "Serverless, low-latency vector database for AI applications"
 license = "Apache-2.0"
@@ -12,7 +12,7 @@ arrow-array = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 object_store = { workspace = true }
-snafu = "0.7.4"
+snafu = { workspace = true }
 half = { workspace = true }
 lance = { workspace = true }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
Author	SHA1	Message	Date
Lance Release	b06e214d29	[python] Bump version: 0.1.15 → 0.1.16	2023-07-31 18:32:40 +00:00
Chang She	c1f8feb6ed	make pandas an optional dependency in lancedb as well (#385 )	2023-07-31 14:08:58 -04:00
Chang She	cada35d5b7	Improve pydantic integration (#384 )	2023-07-31 12:16:44 -04:00
Chang She	2d25c263e9	Implement drop table if exists (#383 )	2023-07-31 10:25:09 +02:00
gsilvestrin	bcd7f66dc7	fix(node): Handle overflows in the node bridge (#372 ) - Fixes many numeric conversions that results in hard to reproduce issues - JsObjectExt extends JsObject with safe methods to extract numericvalues	2023-07-28 13:15:21 -07:00
gsilvestrin	1daecac648	fix(python): Pin pylance and add pandas as test dependency (#373 )	2023-07-27 15:21:45 -07:00
Lance Release	b8e656b2a7	Updating package-lock.json	2023-07-27 21:53:30 +00:00
Lance Release	ff7c1193a7	Updating package-lock.json	2023-07-27 21:06:32 +00:00
Lance Release	6d70e7c29b	Bump version: 0.1.18 → 0.1.19	2023-07-27 21:06:17 +00:00
gsilvestrin	73cc12ecc5	fix(node): Relax EmbeddingFunction type guard (#370 )	2023-07-27 12:51:59 -07:00
gsilvestrin	6036cf48a7	fix(node) Replace panic errors with friendlier ones (#366 ) - Implement Result/Error in the node FFI - Implement a trait (ResultExt) to make error handling less verbose - Refactor some parts of the code that touch arrow into arrow.rs	2023-07-26 13:44:58 -07:00
Ayush Chaurasia	15f4787cc8	[Docs]: Add badges, CTA and updates examples (#358 ) <img width="1054" alt="Screenshot 2023-07-24 at 6 13 00 PM" src="https://github.com/lancedb/lancedb/assets/15766192/a263a17e-66d0-4591-adc7-b520aa5b23f6"> Is this a problem? Are we using metadata to track usage or something?	2023-07-26 16:35:46 +05:30
Lance Release	0e4050e706	[python] Bump version: 0.1.14 → 0.1.15	2023-07-25 18:58:44 +00:00
Rob Meng	147796ffcd	bump lance version for vectordb, fix minor bugs in lancedb remote client (#365 )	2023-07-24 21:30:57 -04:00
Lance Release	6fd465ceef	Updating package-lock.json	2023-07-24 20:02:35 +00:00
Lance Release	e2e5a0fb83	Updating package-lock.json	2023-07-24 19:27:32 +00:00
Lance Release	ff8d5a6d51	Bump version: 0.1.17 → 0.1.18	2023-07-24 19:27:17 +00:00
Will Jones	8829988ada	ci: build node in manylinux docker container (#350 ) Closes #359 TODO: * [x] test in a sample of Linux distro docker containers	2023-07-24 11:31:47 -07:00
gsilvestrin	80a32be121	bugfix(node): make WriteMode optional when specifying embeddings (#336 )	2023-07-24 11:26:43 -07:00
Rob Meng	8325979bb8	dont print apikey in remote client toString, add hostoverride to python client (#353 )	2023-07-23 18:44:00 -04:00
lindt	ed5ff5a482	[docs] typo fix (#352 ) Co-authored-by: Stefan Rohe <think@eduroam152-169.nbk.vse.cz>	2023-07-22 11:18:58 +02:00
Lance Release	2c9371dcc4	Updating package-lock.json	2023-07-21 23:18:22 +00:00
Lance Release	6d5621da4a	Updating package-lock.json	2023-07-21 22:39:21 +00:00
Lance Release	380c1572f3	Bump version: 0.1.16 → 0.1.17	2023-07-21 22:39:06 +00:00
gsilvestrin	4383848d53	feat(node): Add Linux ARM build (#348 )	2023-07-21 15:33:02 -07:00
gsilvestrin	473c43860c	bugfix: Set Github token when pushing changes (#351 )	2023-07-21 15:31:44 -07:00
gsilvestrin	17cf244e53	Updating package-lock.json (#347 )	2023-07-20 14:44:10 -07:00
Leon Yee	0b60694df4	[docs] typo fix (#346 )	2023-07-20 14:33:56 -07:00
Lance Release	600da476e8	Updating package-lock.json	2023-07-20 20:24:54 +00:00
Lance Release	458217783c	Bump version: 0.1.15 → 0.1.16	2023-07-20 20:24:37 +00:00
gsilvestrin	21b1a71a6b	bugfix(node): Don't persist credentials on make-release-commit.yml (#345 )	2023-07-20 13:24:06 -07:00
gsilvestrin	2d899675e8	bugfix(node): Make release task can't push to repo (#344 )	2023-07-20 13:15:29 -07:00
Lance Release	1cbfc1bbf4	[python] Bump version: 0.1.13 → 0.1.14	2023-07-20 20:06:15 +00:00
gsilvestrin	a2bb497135	feat(node) Move native packages to @lancedb NPM org (#341 ) - Move native packages to @lancedb org - Move package-lock.json update to a reusable action and created a target to run it manually.	2023-07-20 12:54:39 -07:00
Will Jones	0cf40c8da3	fix: only use util function to build filesystem (#339 )	2023-07-20 10:41:50 -07:00
Rob Meng	8233c689c3	fix remote SDK (#342 )	2023-07-20 02:01:13 -04:00
gsilvestrin	6e24e731b8	Updating package-lock.json (#338 )	2023-07-18 21:10:18 -07:00