Bump version: 0.16.0 → 0.16.1-beta.0

feat: bump lance to 0.20.0b2 (#1865 )
Bump lance version. Upstream change log: https://github.com/lancedb/lance/releases/tag/v0.20.0-beta.2
2025-12-25 14:29:56 +00:00 · 2024-11-21 21:52:39 +00:00 · 2024-11-21 13:16:59 -08:00 · 2024-11-21 10:50:50 -08:00 · 2024-11-21 13:35:14 -05:00 · 2024-11-21 09:02:49 -08:00
104 changed files with 9856 additions and 2610 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.13.0-beta.1"
+current_version = "0.13.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -87,6 +87,16 @@ glob = "node/package.json"
 replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
 search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""

+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-linux-arm64-musl\": \"{new_version}\""
+search = "\"@lancedb/vectordb-linux-arm64-musl\": \"{current_version}\""
+
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-linux-x64-musl\": \"{new_version}\""
+search = "\"@lancedb/vectordb-linux-x64-musl\": \"{current_version}\""
+
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -31,6 +31,9 @@ rustflags = [
 [target.x86_64-unknown-linux-gnu]
 rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=+avx2,+fma,+f16c"]

+[target.x86_64-unknown-linux-musl]
+rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=-crt-static,+avx2,+fma,+f16c"]
+
 [target.aarch64-apple-darwin]
 rustflags = ["-C", "target-cpu=apple-m1", "-C", "target-feature=+neon,+fp16,+fhm,+dotprod"]

--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -31,7 +31,7 @@ jobs:
      - name: Install dependecies needed for ubuntu
        run: |
          sudo apt install -y protobuf-compiler libssl-dev
-          rustup update && rustup default        
+          rustup update && rustup default
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
@@ -41,8 +41,8 @@ jobs:
      - name: Build Python
        working-directory: python
        run: |
-          python -m pip install -e .
-          python -m pip install -r ../docs/requirements.txt
+          python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
+          python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
      - name: Set up node
        uses: actions/setup-node@v3
        with:
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -49,7 +49,7 @@ jobs:
    - name: Build Python
      working-directory: docs/test
      run:
-        python -m pip install -r requirements.txt
+        python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
    - name: Create test files
      run: |
        cd docs/test
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -53,6 +53,9 @@ jobs:
        cargo clippy --all --all-features -- -D warnings
        npm ci
        npm run lint-ci
+    - name: Lint examples
+      working-directory: nodejs/examples
+      run: npm ci && npm run lint-ci
  linux:
    name: Linux (NodeJS ${{ matrix.node-version }})
    timeout-minutes: 30
@@ -91,6 +94,18 @@ jobs:
      env:
        S3_TEST: "1"
      run: npm run test
+    - name: Setup examples
+      working-directory: nodejs/examples
+      run: npm ci
+    - name: Test examples
+      working-directory: ./
+      env:
+        OPENAI_API_KEY: test
+        OPENAI_BASE_URL: http://0.0.0.0:8000
+      run: |
+        python ci/mock_openai.py &
+        cd nodejs/examples
+        npm test
  macos:
    timeout-minutes: 30
    runs-on: "macos-14"
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -101,7 +101,7 @@ jobs:
          path: |
            nodejs/dist/*.node

-  node-linux:
+  node-linux-gnu:
    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
    runs-on: ${{ matrix.config.runner }}
    # Only runs on tags that matches the make-release action
@@ -137,11 +137,63 @@ jobs:
      - name: Upload Linux Artifacts
        uses: actions/upload-artifact@v4
        with:
-          name: node-native-linux-${{ matrix.config.arch }}
+          name: node-native-linux-${{ matrix.config.arch }}-gnu
          path: |
            node/dist/lancedb-vectordb-linux*.tgz

-  nodejs-linux:
+  node-linux-musl:
+    name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
+    runs-on: ubuntu-latest
+    container: alpine:edge
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - arch: x86_64
+          - arch: aarch64
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install common dependencies
+        run: |
+          apk add protobuf-dev curl clang mold grep npm bash
+          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
+          echo "source $HOME/.cargo/env" >> saved_env
+          echo "export CC=clang" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=-crt-static,+avx2,+fma,+f16c -Clinker=clang -Clink-arg=-fuse-ld=mold'" >> saved_env
+      - name: Configure aarch64 build
+        if: ${{ matrix.config.arch == 'aarch64' }}
+        run: |
+          source "$HOME/.cargo/env"
+          rustup target add aarch64-unknown-linux-musl --toolchain 1.80.0
+          crt=$(realpath $(dirname $(rustup which rustc))/../lib/rustlib/aarch64-unknown-linux-musl/lib/self-contained)
+          sysroot_lib=/usr/aarch64-unknown-linux-musl/usr/lib
+          apk_url=https://dl-cdn.alpinelinux.org/alpine/latest-stable/main/aarch64/
+          curl -sSf $apk_url > apk_list
+          for pkg in gcc libgcc musl; do curl -sSf $apk_url$(cat apk_list | grep -oP '(?<=")'$pkg'-\d.*?(?=")') | tar zxf -; done
+          mkdir -p $sysroot_lib
+          echo 'GROUP ( libgcc_s.so.1 -lgcc )' > $sysroot_lib/libgcc_s.so
+          cp usr/lib/libgcc_s.so.1 $sysroot_lib
+          cp usr/lib/gcc/aarch64-alpine-linux-musl/*/libgcc.a $sysroot_lib
+          cp lib/ld-musl-aarch64.so.1 $sysroot_lib/libc.so
+          echo '!<arch>' > $sysroot_lib/libdl.a
+          (cd $crt && cp crti.o crtbeginS.o crtendS.o crtn.o -t $sysroot_lib)
+          echo "export CARGO_BUILD_TARGET=aarch64-unknown-linux-musl" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-cpu=apple-m1 -Ctarget-feature=-crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=--target=aarch64-unknown-linux-musl -Clink-arg=--sysroot=/usr/aarch64-unknown-linux-musl -Clink-arg=-lc'" >> saved_env
+      - name: Build Linux Artifacts
+        run: |
+          source ./saved_env
+          bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }}
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: node-native-linux-${{ matrix.config.arch }}-musl
+          path: |
+            node/dist/lancedb-vectordb-linux*.tgz
+
+  nodejs-linux-gnu:
    name: lancedb (${{ matrix.config.arch}}-unknown-linux-gnu
    runs-on: ${{ matrix.config.runner }}
    # Only runs on tags that matches the make-release action
@@ -178,7 +230,7 @@ jobs:
      - name: Upload Linux Artifacts
        uses: actions/upload-artifact@v4
        with:
-          name: nodejs-native-linux-${{ matrix.config.arch }}
+          name: nodejs-native-linux-${{ matrix.config.arch }}-gnu
          path: |
            nodejs/dist/*.node
      # The generic files are the same in all distros so we just pick
@@ -192,6 +244,62 @@ jobs:
            nodejs/dist/*
            !nodejs/dist/*.node

+  nodejs-linux-musl:
+    name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
+    runs-on: ubuntu-latest
+    container: alpine:edge
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - arch: x86_64
+          - arch: aarch64
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install common dependencies
+        run: |
+          apk add protobuf-dev curl clang mold grep npm bash openssl-dev openssl-libs-static
+          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
+          echo "source $HOME/.cargo/env" >> saved_env
+          echo "export CC=clang" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=-crt-static,+avx2,+fma,+f16c -Clinker=clang -Clink-arg=-fuse-ld=mold'" >> saved_env
+          echo "export X86_64_UNKNOWN_LINUX_MUSL_OPENSSL_INCLUDE_DIR=/usr/include" >> saved_env
+          echo "export X86_64_UNKNOWN_LINUX_MUSL_OPENSSL_LIB_DIR=/usr/lib" >> saved_env
+      - name: Configure aarch64 build
+        if: ${{ matrix.config.arch == 'aarch64' }}
+        run: |
+          source "$HOME/.cargo/env"
+          rustup target add aarch64-unknown-linux-musl --toolchain 1.80.0
+          crt=$(realpath $(dirname $(rustup which rustc))/../lib/rustlib/aarch64-unknown-linux-musl/lib/self-contained)
+          sysroot_lib=/usr/aarch64-unknown-linux-musl/usr/lib
+          apk_url=https://dl-cdn.alpinelinux.org/alpine/latest-stable/main/aarch64/
+          curl -sSf $apk_url > apk_list
+          for pkg in gcc libgcc musl openssl-dev openssl-libs-static; do curl -sSf $apk_url$(cat apk_list | grep -oP '(?<=")'$pkg'-\d.*?(?=")') | tar zxf -; done
+          mkdir -p $sysroot_lib
+          echo 'GROUP ( libgcc_s.so.1 -lgcc )' > $sysroot_lib/libgcc_s.so
+          cp usr/lib/libgcc_s.so.1 $sysroot_lib
+          cp usr/lib/gcc/aarch64-alpine-linux-musl/*/libgcc.a $sysroot_lib
+          cp lib/ld-musl-aarch64.so.1 $sysroot_lib/libc.so
+          echo '!<arch>' > $sysroot_lib/libdl.a
+          (cd $crt && cp crti.o crtbeginS.o crtendS.o crtn.o -t $sysroot_lib)
+          echo "export CARGO_BUILD_TARGET=aarch64-unknown-linux-musl" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-feature=-crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=--target=aarch64-unknown-linux-musl -Clink-arg=--sysroot=/usr/aarch64-unknown-linux-musl -Clink-arg=-lc'" >> saved_env
+          echo "export AARCH64_UNKNOWN_LINUX_MUSL_OPENSSL_INCLUDE_DIR=$(realpath usr/include)" >> saved_env
+          echo "export AARCH64_UNKNOWN_LINUX_MUSL_OPENSSL_LIB_DIR=$(realpath usr/lib)" >> saved_env
+      - name: Build Linux Artifacts
+        run: |
+          source ./saved_env
+          bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: nodejs-native-linux-${{ matrix.config.arch }}-musl
+          path: |
+            nodejs/dist/*.node
+
  node-windows:
    name: vectordb ${{ matrix.target }}
    runs-on: windows-2022
@@ -226,125 +334,109 @@ jobs:
          path: |
            node/dist/lancedb-vectordb-win32*.tgz

-  node-windows-arm64:
-    name: vectordb win32-arm64-msvc
-    runs-on: windows-4x-arm
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: actions/checkout@v4
-      - name: Cache installations
-        id: cache-installs
-        uses: actions/cache@v4
-        with:
-          path: |
-            C:\Program Files\Git
-            C:\BuildTools
-            C:\Program Files (x86)\Windows Kits
-            C:\Program Files\7-Zip
-            C:\protoc
-          key: ${{ runner.os }}-arm64-installs-v1
-          restore-keys: |
-            ${{ runner.os }}-arm64-installs-
-      - name: Install Git
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        run: |
-          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
-          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
-        shell: powershell
-      - name: Add Git to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
-          $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        shell: powershell
-      - name: Configure Git symlinks
-        run: git config --global core.symlinks true
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-      - name: Install Visual Studio Build Tools
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        run: |
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
-          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
-            "--installPath", "C:\BuildTools", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
-            "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
-        shell: powershell
-      - name: Add Visual Studio Build Tools to PATH
-        run: |
-          $vsPath = "C:\BuildTools\VC\Tools\MSVC"
-          $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
+  # TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
+  # node-windows-arm64:
+  #   name: vectordb win32-arm64-msvc
+  #   runs-on: windows-4x-arm
+  #   if: startsWith(github.ref, 'refs/tags/v')
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Install Git
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
+  #         Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
+  #       shell: powershell
+  #     - name: Add Git to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
+  #         $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  #       shell: powershell
+  #     - name: Configure Git symlinks
+  #       run: git config --global core.symlinks true
+  #     - uses: actions/checkout@v4
+  #     - uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.13"
+  #     - name: Install Visual Studio Build Tools
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
+  #         Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
+  #           "--installPath", "C:\BuildTools", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
+  #           "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
+  #       shell: powershell
+  #     - name: Add Visual Studio Build Tools to PATH
+  #       run: |
+  #         $vsPath = "C:\BuildTools\VC\Tools\MSVC"
+  #         $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"

-          # Add MSVC runtime libraries to LIB
-          $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" + 
-                     "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
-                     "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
-          Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
+  #         # Add MSVC runtime libraries to LIB
+  #         $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
+  #                    "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
+  #                    "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+  #         Add-Content $env:GITHUB_ENV "LIB=$env:LIB"

-          # Add INCLUDE paths
-          $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
-          Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
-        shell: powershell
-      - name: Install Rust
-        run: |
-          Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
-          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
-        shell: powershell
-      - name: Add Rust to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
-        shell: powershell
+  #         # Add INCLUDE paths
+  #         $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
+  #                       "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
+  #                       "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
+  #                       "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
+  #         Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
+  #       shell: powershell
+  #     - name: Install Rust
+  #       run: |
+  #         Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+  #         .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+  #       shell: powershell
+  #     - name: Add Rust to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
+  #       shell: powershell

-      - uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: rust
-      - name: Install 7-Zip ARM
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        run: |
-          New-Item -Path 'C:\7zip' -ItemType Directory
-          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
-          Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
-        shell: powershell
-      - name: Add 7-Zip to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
-        shell: powershell
-      - name: Install Protoc v21.12
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        working-directory: C:\
-        run: |
-          if (Test-Path 'C:\protoc') {
-              Write-Host "Protoc directory exists, skipping installation"
-              return
-          }
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
-        shell: powershell
-      - name: Add Protoc to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
-      - name: Upload Windows ARM64 Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-windows-arm64
-          path: |
-            node/dist/*.node
+  #     - uses: Swatinem/rust-cache@v2
+  #       with:
+  #         workspaces: rust
+  #     - name: Install 7-Zip ARM
+  #       run: |
+  #         New-Item -Path 'C:\7zip' -ItemType Directory
+  #         Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
+  #         Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
+  #       shell: powershell
+  #     - name: Add 7-Zip to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
+  #       shell: powershell
+  #     - name: Install Protoc v21.12
+  #       working-directory: C:\
+  #       run: |
+  #         if (Test-Path 'C:\protoc') {
+  #             Write-Host "Protoc directory exists, skipping installation"
+  #             return
+  #         }
+  #         New-Item -Path 'C:\protoc' -ItemType Directory
+  #         Set-Location C:\protoc
+  #         Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+  #         & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
+  #       shell: powershell
+  #     - name: Add Protoc to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+  #       shell: powershell
+  #     - name: Build Windows native node modules
+  #       run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
+  #     - name: Upload Windows ARM64 Artifacts
+  #       uses: actions/upload-artifact@v4
+  #       with:
+  #         name: node-native-windows-arm64
+  #         path: |
+  #           node/dist/*.node

  nodejs-windows:
    name: lancedb ${{ matrix.target }}
@@ -380,119 +472,103 @@ jobs:
          path: |
            nodejs/dist/*.node

-  nodejs-windows-arm64:
-    name: lancedb win32-arm64-msvc
-    runs-on: windows-4x-arm
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: actions/checkout@v4
-      - name: Cache installations
-        id: cache-installs
-        uses: actions/cache@v4
-        with:
-          path: |
-            C:\Program Files\Git
-            C:\BuildTools
-            C:\Program Files (x86)\Windows Kits
-            C:\Program Files\7-Zip
-            C:\protoc
-          key: ${{ runner.os }}-arm64-installs-v1
-          restore-keys: |
-            ${{ runner.os }}-arm64-installs-
-      - name: Install Git
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        run: |
-          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
-          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
-        shell: powershell
-      - name: Add Git to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
-          $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        shell: powershell
-      - name: Configure Git symlinks
-        run: git config --global core.symlinks true
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-      - name: Install Visual Studio Build Tools
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        run: |
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
-          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
-            "--installPath", "C:\BuildTools", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
-            "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
-        shell: powershell
-      - name: Add Visual Studio Build Tools to PATH
-        run: |
-          $vsPath = "C:\BuildTools\VC\Tools\MSVC"
-          $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
+  # TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
+  # nodejs-windows-arm64:
+  #   name: lancedb win32-arm64-msvc
+  #   runs-on: windows-4x-arm
+  #   if: startsWith(github.ref, 'refs/tags/v')
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Install Git
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
+  #         Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
+  #       shell: powershell
+  #     - name: Add Git to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
+  #         $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  #       shell: powershell
+  #     - name: Configure Git symlinks
+  #       run: git config --global core.symlinks true
+  #     - uses: actions/checkout@v4
+  #     - uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.13"
+  #     - name: Install Visual Studio Build Tools
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
+  #         Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
+  #           "--installPath", "C:\BuildTools", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
+  #           "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
+  #       shell: powershell
+  #     - name: Add Visual Studio Build Tools to PATH
+  #       run: |
+  #         $vsPath = "C:\BuildTools\VC\Tools\MSVC"
+  #         $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"

-          $env:LIB = ""
-          Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
-        shell: powershell
-      - name: Install Rust
-        run: |
-          Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
-          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
-        shell: powershell
-      - name: Add Rust to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
-        shell: powershell
+  #         $env:LIB = ""
+  #         Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+  #       shell: powershell
+  #     - name: Install Rust
+  #       run: |
+  #         Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+  #         .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+  #       shell: powershell
+  #     - name: Add Rust to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
+  #       shell: powershell

-      - uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: rust
-      - name: Install 7-Zip ARM
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        run: |
-          New-Item -Path 'C:\7zip' -ItemType Directory
-          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
-          Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
-        shell: powershell
-      - name: Add 7-Zip to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
-        shell: powershell
-      - name: Install Protoc v21.12
-        if: steps.cache-installs.outputs.cache-hit != 'true'
-        working-directory: C:\
-        run: |
-          if (Test-Path 'C:\protoc') {
-              Write-Host "Protoc directory exists, skipping installation"
-              return
-          }
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
-        shell: powershell
-      - name: Add Protoc to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
-      - name: Upload Windows ARM64 Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: nodejs-native-windows-arm64
-          path: |
-            nodejs/dist/*.node
+  #     - uses: Swatinem/rust-cache@v2
+  #       with:
+  #         workspaces: rust
+  #     - name: Install 7-Zip ARM
+  #       run: |
+  #         New-Item -Path 'C:\7zip' -ItemType Directory
+  #         Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
+  #         Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
+  #       shell: powershell
+  #     - name: Add 7-Zip to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
+  #       shell: powershell
+  #     - name: Install Protoc v21.12
+  #       working-directory: C:\
+  #       run: |
+  #         if (Test-Path 'C:\protoc') {
+  #             Write-Host "Protoc directory exists, skipping installation"
+  #             return
+  #         }
+  #         New-Item -Path 'C:\protoc' -ItemType Directory
+  #         Set-Location C:\protoc
+  #         Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+  #         & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
+  #       shell: powershell
+  #     - name: Add Protoc to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+  #       shell: powershell
+  #     - name: Build Windows native node modules
+  #       run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
+  #     - name: Upload Windows ARM64 Artifacts
+  #       uses: actions/upload-artifact@v4
+  #       with:
+  #         name: nodejs-native-windows-arm64
+  #         path: |
+  #           nodejs/dist/*.node

  release:
    name: vectordb NPM Publish
-    needs: [node, node-macos, node-linux, node-windows, node-windows-arm64]
+    needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -510,7 +586,7 @@ jobs:
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
        run: |
-          # Tag beta as "preview" instead of default "latest". See lancedb 
+          # Tag beta as "preview" instead of default "latest". See lancedb
          # npm publish step for more info.
          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
            PUBLISH_ARGS="--tag preview"
@@ -532,7 +608,7 @@ jobs:

  release-nodejs:
    name: lancedb NPM Publish
-    needs: [nodejs-macos, nodejs-linux, nodejs-windows, nodejs-windows-arm64]
+    needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -138,7 +138,7 @@ jobs:
        run: rm -rf target/wheels
  windows:
    name: "Windows: ${{ matrix.config.name }}"
-    timeout-minutes: 30
+    timeout-minutes: 60
    strategy:
      matrix:
        config:
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -50,6 +50,7 @@ jobs:
        run: cargo fmt --all -- --check
      - name: Run clippy
        run: cargo clippy --workspace --tests --all-features -- -D warnings
+
  linux:
    timeout-minutes: 30
    # To build all features, we need more disk space than is available
@@ -91,6 +92,7 @@ jobs:
        run: cargo test --all-features
      - name: Run examples
        run: cargo run --example simple
+
  macos:
    timeout-minutes: 30
    strategy:
@@ -118,6 +120,7 @@ jobs:
      - name: Run tests
        # Run with everything except the integration tests.
        run: cargo test --features remote,fp16kernels
+
  windows:
    runs-on: windows-2022
    steps:
@@ -139,24 +142,11 @@ jobs:
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
          cargo build
          cargo test
+
  windows-arm64:
    runs-on: windows-4x-arm
    steps:
-      - name: Cache installations
-        id: cache-installs
-        uses: actions/cache@v4
-        with:
-          path: |
-            C:\Program Files\Git
-            C:\BuildTools
-            C:\Program Files (x86)\Windows Kits
-            C:\Program Files\7-Zip
-            C:\protoc
-          key: ${{ runner.os }}-arm64-installs-v1
-          restore-keys: |
-            ${{ runner.os }}-arm64-installs-
      - name: Install Git
-        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
@@ -173,7 +163,6 @@ jobs:
        with:
          python-version: "3.13"
      - name: Install Visual Studio Build Tools
-        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
@@ -217,12 +206,10 @@ jobs:
        run: |
          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
        shell: powershell
-
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
      - name: Install 7-Zip ARM
-        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          New-Item -Path 'C:\7zip' -ItemType Directory
          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
@@ -232,12 +219,11 @@ jobs:
        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
        shell: powershell
      - name: Install Protoc v21.12
-        if: steps.cache-installs.outputs.cache-hit != 'true'
        working-directory: C:\
        run: |
          if (Test-Path 'C:\protoc') {
-              Write-Host "Protoc directory exists, skipping installation"
-              return
+            Write-Host "Protoc directory exists, skipping installation"
+            return
          }
          New-Item -Path 'C:\protoc' -ItemType Directory
          Set-Location C:\protoc
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,18 +18,18 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
+rust-version = "1.80.0"                                                     # TODO: lower this once we upgrade Lance again.

 [workspace.dependencies]
-lance = { "version" = "=0.19.2", "features" = [
+lance = { "version" = "=0.20.0", "features" = [
    "dynamodb",
-], git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
-lance-index = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
-lance-linalg = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
-lance-table = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
-lance-testing = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
-lance-datafusion = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
-lance-encoding = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2-beta.3" }
+], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
 # Note that this one does not include pyarrow
 arrow = { version = "52.2", optional = false }
 arrow-array = "52.2"
--- a/ci/manylinux_node/build_lancedb.sh
+++ b/ci/manylinux_node/build_lancedb.sh
@@ -11,7 +11,8 @@ fi
 export OPENSSL_STATIC=1
 export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl

-source $HOME/.bashrc
+#Alpine doesn't have .bashrc
+FILE=$HOME/.bashrc && test -f $FILE && source $FILE

 cd nodejs
 npm ci
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -5,13 +5,14 @@ ARCH=${1:-x86_64}

 if [ "$ARCH" = "x86_64" ]; then
    export OPENSSL_LIB_DIR=/usr/local/lib64/
-else 
+else
    export OPENSSL_LIB_DIR=/usr/local/lib/
 fi
 export OPENSSL_STATIC=1
 export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl

-source $HOME/.bashrc
+#Alpine doesn't have .bashrc
+FILE=$HOME/.bashrc && test -f $FILE && source $FILE

 cd node
 npm ci
--- a/ci/mock_openai.py
+++ b/ci/mock_openai.py
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+"""A zero-dependency mock OpenAI embeddings API endpoint for testing purposes."""
+import argparse
+import json
+import http.server
+
+
+class MockOpenAIRequestHandler(http.server.BaseHTTPRequestHandler):
+    def do_POST(self):
+        content_length = int(self.headers["Content-Length"])
+        post_data = self.rfile.read(content_length)
+        post_data = json.loads(post_data.decode("utf-8"))
+        # See: https://platform.openai.com/docs/api-reference/embeddings/create
+
+        if isinstance(post_data["input"], str):
+            num_inputs = 1
+        else:
+            num_inputs = len(post_data["input"])
+
+        model = post_data.get("model", "text-embedding-ada-002")
+
+        data = []
+        for i in range(num_inputs):
+            data.append({
+                "object": "embedding",
+                "embedding": [0.1] * 1536,
+                "index": i,
+            })
+
+        response = {
+            "object": "list",
+            "data": data,
+            "model": model,
+            "usage": {
+                "prompt_tokens": 0,
+                "total_tokens": 0,
+            }
+        }
+
+        self.send_response(200)
+        self.send_header("Content-type", "application/json")
+        self.end_headers()
+        self.wfile.write(json.dumps(response).encode("utf-8"))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Mock OpenAI embeddings API endpoint")
+    parser.add_argument("--port", type=int, default=8000, help="Port to listen on")
+    args = parser.parse_args()
+    port = args.port
+
+    print(f"server started on port {port}. Press Ctrl-C to stop.")
+    print(f"To use, set OPENAI_BASE_URL=http://localhost:{port} in your environment.")
+
+    with http.server.HTTPServer(("0.0.0.0", port), MockOpenAIRequestHandler) as server:
+        server.serve_forever()
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -138,6 +138,7 @@ nav:
              - Jina Reranker: reranking/jina.md
              - OpenAI Reranker: reranking/openai.md
              - AnswerDotAi Rerankers: reranking/answerdotai.md
+              - Voyage AI Rerankers: reranking/voyageai.md
              - Building Custom Rerankers: reranking/custom_reranker.md
              - Example: notebooks/lancedb_reranking.ipynb
          - Filtering: sql.md
@@ -165,6 +166,7 @@ nav:
                  - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
                  - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
                  - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
+                  - Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
              - Multimodal Embedding Functions:
                  - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.4.6",
+      "version": "0.12.0",
      "cpu": [
        "x64",
        "arm64"
@@ -31,9 +31,7 @@
        "win32"
      ],
      "dependencies": {
-        "@apache-arrow/ts": "^14.0.2",
        "@neon-rs/load": "^0.0.74",
-        "apache-arrow": "^14.0.2",
        "axios": "^1.4.0"
      },
      "devDependencies": {
@@ -46,6 +44,7 @@
        "@types/temp": "^0.9.1",
        "@types/uuid": "^9.0.3",
        "@typescript-eslint/eslint-plugin": "^5.59.1",
+        "apache-arrow-old": "npm:apache-arrow@13.0.0",
        "cargo-cp-artifact": "^0.1",
        "chai": "^4.3.7",
        "chai-as-promised": "^7.1.1",
@@ -62,15 +61,19 @@
        "ts-node-dev": "^2.0.0",
        "typedoc": "^0.24.7",
        "typedoc-plugin-markdown": "^3.15.3",
-        "typescript": "*",
+        "typescript": "^5.1.0",
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.4.6",
-        "@lancedb/vectordb-darwin-x64": "0.4.6",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.4.6",
-        "@lancedb/vectordb-linux-x64-gnu": "0.4.6",
-        "@lancedb/vectordb-win32-x64-msvc": "0.4.6"
+        "@lancedb/vectordb-darwin-arm64": "0.12.0",
+        "@lancedb/vectordb-darwin-x64": "0.12.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
+      },
+      "peerDependencies": {
+        "@apache-arrow/ts": "^14.0.2",
+        "apache-arrow": "^14.0.2"
      }
    },
    "../node/node_modules/apache-arrow": {
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -45,9 +45,9 @@ Lance supports `IVF_PQ` index type by default.
        Creating indexes is done via the [lancedb.Table.createIndex](../js/classes/Table.md/#createIndex) method.

        ```typescript
-        --8<--- "nodejs/examples/ann_indexes.ts:import"
+        --8<--- "nodejs/examples/ann_indexes.test.ts:import"

-        --8<-- "nodejs/examples/ann_indexes.ts:ingest"
+        --8<-- "nodejs/examples/ann_indexes.test.ts:ingest"
        ```

    === "vectordb (deprecated)"
@@ -140,13 +140,15 @@ There are a couple of parameters that can be used to fine-tune the search:

 - **limit** (default: 10): The amount of results that will be returned
 - **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.<br/>
-  Most of the time, setting nprobes to cover 5-10% of the dataset should achieve high recall with low latency.<br/>
-  e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
-  Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
+  Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.<br/>
+    - _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality. <br/>
+  
 - **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
  A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
-  e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
-  Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
+    - _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.<br/>
+!!! note 
+    Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored.
+

 === "Python"

@@ -169,7 +171,7 @@ There are a couple of parameters that can be used to fine-tune the search:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/ann_indexes.ts:search1"
+        --8<-- "nodejs/examples/ann_indexes.test.ts:search1"
        ```

    === "vectordb (deprecated)"
@@ -203,7 +205,7 @@ You can further filter the elements returned by a search using a where clause.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/ann_indexes.ts:search2"
+        --8<-- "nodejs/examples/ann_indexes.test.ts:search2"
        ```

    === "vectordb (deprecated)"
@@ -235,7 +237,7 @@ You can select the columns returned by the query using a select clause.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/ann_indexes.ts:search3"
+        --8<-- "nodejs/examples/ann_indexes.test.ts:search3"
        ```

    === "vectordb (deprecated)"
@@ -275,7 +277,15 @@ Product quantization can lead to approximately `16 * sizeof(float32) / 1 = 64` t
 Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
 On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows lead to a good latency / recall.

-`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. Because
+`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. The number should be a factor of the vector dimension. Because
 PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
-less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and
-more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
+less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
+
+!!! note
+    if `num_sub_vectors` is set to be greater than the vector dimension, you will see errors like `attempt to divide by zero`
+
+### How to choose `m` and `ef_construction` for `IVF_HNSW_*` index?
+
+`m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption.
+
+`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -157,7 +157,7 @@ recommend switching to stable releases.
        import * as lancedb from "@lancedb/lancedb";
        import * as arrow from "apache-arrow";

-        --8<-- "nodejs/examples/basic.ts:connect"
+        --8<-- "nodejs/examples/basic.test.ts:connect"
        ```

    === "vectordb (deprecated)"
@@ -212,7 +212,7 @@ table.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:create_table"
+        --8<-- "nodejs/examples/basic.test.ts:create_table"
        ```

    === "vectordb (deprecated)"
@@ -268,7 +268,7 @@ similar to a `CREATE TABLE` statement in SQL.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:create_empty_table"
+        --8<-- "nodejs/examples/basic.test.ts:create_empty_table"
        ```

    === "vectordb (deprecated)"
@@ -298,7 +298,7 @@ Once created, you can open a table as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:open_table"
+        --8<-- "nodejs/examples/basic.test.ts:open_table"
        ```

    === "vectordb (deprecated)"
@@ -327,7 +327,7 @@ If you forget the name of your table, you can always get a listing of all table
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:table_names"
+        --8<-- "nodejs/examples/basic.test.ts:table_names"
        ```

    === "vectordb (deprecated)"
@@ -357,7 +357,7 @@ After a table has been created, you can always add more data to it as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:add_data"
+        --8<-- "nodejs/examples/basic.test.ts:add_data"
        ```

    === "vectordb (deprecated)"
@@ -389,7 +389,7 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:vector_search"
+        --8<-- "nodejs/examples/basic.test.ts:vector_search"
        ```

    === "vectordb (deprecated)"
@@ -429,7 +429,7 @@ LanceDB allows you to create an ANN index on a table as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:create_index"
+        --8<-- "nodejs/examples/basic.test.ts:create_index"
        ```

    === "vectordb (deprecated)"
@@ -469,7 +469,7 @@ This can delete any number of rows that match the filter.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:delete_rows"
+        --8<-- "nodejs/examples/basic.test.ts:delete_rows"
        ```

    === "vectordb (deprecated)"
@@ -527,7 +527,7 @@ Use the `drop_table()` method on the database to remove a table.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:drop_table"
+        --8<-- "nodejs/examples/basic.test.ts:drop_table"
        ```

    === "vectordb (deprecated)"
@@ -561,8 +561,8 @@ You can use the embedding API when working with embedding models. It automatical
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/embedding.ts:imports"
-        --8<-- "nodejs/examples/embedding.ts:openai_embeddings"
+        --8<-- "nodejs/examples/embedding.test.ts:imports"
+        --8<-- "nodejs/examples/embedding.test.ts:openai_embeddings"
        ```

 === "Rust"
--- a/docs/src/concepts/index_hnsw.md
+++ b/docs/src/concepts/index_hnsw.md
@@ -57,6 +57,13 @@ Then the greedy search routine operates as follows:

 ## Usage

+There are three key parameters to set when constructing an HNSW index:
+
+* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
+* `m`: The number of neighbors to select for each vector in the HNSW graph.
+* `ef_construction`: The number of candidates to evaluate during the construction of the HNSW graph.
+
+
 We can combine the above concepts to understand how to build and query an HNSW index in LanceDB.

 ### Construct index
--- a/docs/src/concepts/index_ivfpq.md
+++ b/docs/src/concepts/index_ivfpq.md
@@ -58,8 +58,10 @@ In Python, the index can be created as follows:
 # Make sure you have enough data in the table for an effective training step
 tbl.create_index(metric="L2", num_partitions=256, num_sub_vectors=96)
 ```
+!!! note
+    `num_partitions`=256 and `num_sub_vectors`=96 does not work for every dataset. Those values needs to be adjusted for your particular dataset.

-The `num_partitions` is usually chosen to target a particular number of vectors per partition. `num_sub_vectors` is typically chosen based on the desired recall and the dimensionality of the vector. See the [FAQs](#faq) below for best practices on choosing these parameters.
+The `num_partitions` is usually chosen to target a particular number of vectors per partition. `num_sub_vectors` is typically chosen based on the desired recall and the dimensionality of the vector. See [here](../ann_indexes.md/#how-to-choose-num_partitions-and-num_sub_vectors-for-ivf_pq-index) for best practices on choosing these parameters.


 ### Query the index
--- a/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
@@ -20,7 +20,7 @@ Supported parameters (to be passed in `create` method) are:

 | Parameter | Type | Default Value | Description |
 |---|---|--------|---------|
-| `name` | `str` | `"voyage-3"` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
+| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
 | `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
 | `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |

--- a/docs/src/embeddings/custom_embedding_function.md
+++ b/docs/src/embeddings/custom_embedding_function.md
@@ -47,9 +47,9 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp
 === "TypeScript"

    ```ts
-    --8<--- "nodejs/examples/custom_embedding_function.ts:imports"
+    --8<--- "nodejs/examples/custom_embedding_function.test.ts:imports"

-    --8<--- "nodejs/examples/custom_embedding_function.ts:embedding_impl"
+    --8<--- "nodejs/examples/custom_embedding_function.test.ts:embedding_impl"
    ```


@@ -78,7 +78,7 @@ Now you can use this embedding function to create your table schema and that's i
 === "TypeScript"

    ```ts
-    --8<--- "nodejs/examples/custom_embedding_function.ts:call_custom_function"
+    --8<--- "nodejs/examples/custom_embedding_function.test.ts:call_custom_function"
    ```

 !!! note
--- a/docs/src/embeddings/default_embedding_functions.md
+++ b/docs/src/embeddings/default_embedding_functions.md
@@ -53,6 +53,7 @@ These functions are registered by default to handle text embeddings.
 | [**Jina Embeddings**](available_embedding_models/text_embedding_functions/jina_embedding.md "jina") | 🔗 World-class embedding models to improve your search and RAG systems. You will need **jina api key**. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/jina.png" alt="Jina Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/jina_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [ **AWS Bedrock Functions**](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md "bedrock-text") | ☁️ AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/aws_bedrock.png" alt="AWS Bedrock Icon" width="120" height="35">](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**IBM Watsonx.ai**](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md "watsonx") | 💡 Generate text embeddings using IBM's watsonx.ai platform. **Note**: watsonx.ai library is an optional dependency. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/watsonx.png" alt="Watsonx Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md) |
+| [**VoyageAI Embeddings**](available_embedding_models/text_embedding_functions/voyageai_embedding.md "voyageai") | 🌕 Voyage AI provides cutting-edge embedding and rerankers. This will help you get started with **VoyageAI** embedding models using LanceDB. Using voyageai API requires voyageai package. Install it via `pip`. | [<img src="https://www.voyageai.com/logo.svg" alt="VoyageAI Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/voyageai_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               



@@ -66,6 +67,7 @@ These functions are registered by default to handle text embeddings.
 [jina-key]: "jina"
 [aws-key]: "bedrock-text"
 [watsonx-key]: "watsonx"
+[voyageai-key]: "voyageai"


 ## Multi-modal Embedding Functions🖼️ 
--- a/docs/src/embeddings/embedding_functions.md
+++ b/docs/src/embeddings/embedding_functions.md
@@ -94,8 +94,8 @@ the embeddings at all:
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/embedding.ts:imports"
-        --8<-- "nodejs/examples/embedding.ts:embedding_function"
+        --8<-- "nodejs/examples/embedding.test.ts:imports"
+        --8<-- "nodejs/examples/embedding.test.ts:embedding_function"
        ```

    === "vectordb (deprecated)"
@@ -150,7 +150,7 @@ need to worry about it when you query the table:
            .toArray()
        ```

-    === "vectordb (deprecated)
+    === "vectordb (deprecated)"

        ```ts
        const results = await table
--- a/docs/src/embeddings/index.md
+++ b/docs/src/embeddings/index.md
@@ -51,8 +51,8 @@ LanceDB registers the OpenAI embeddings function in the registry as `openai`. Yo
 === "TypeScript"

    ```typescript
-    --8<--- "nodejs/examples/embedding.ts:imports"
-    --8<--- "nodejs/examples/embedding.ts:openai_embeddings"
+    --8<--- "nodejs/examples/embedding.test.ts:imports"
+    --8<--- "nodejs/examples/embedding.test.ts:openai_embeddings"
    ```

 === "Rust"
@@ -121,12 +121,10 @@ class Words(LanceModel):
    vector: Vector(func.ndims()) = func.VectorField()

 table = db.create_table("words", schema=Words)
-table.add(
-    [
-        {"text": "hello world"},
-        {"text": "goodbye world"}
-    ]
-    )
+table.add([
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+])

 query = "greetings"
 actual = table.search(query).limit(1).to_pydantic(Words)[0]
--- a/docs/src/fts.md
+++ b/docs/src/fts.md
@@ -114,12 +114,45 @@ table.create_fts_index("text",

 LanceDB full text search supports to filter the search results by a condition, both pre-filtering and post-filtering are supported.

-This can be invoked via the familiar `where` syntax:
-
+This can be invoked via the familiar `where` syntax.
+ 
+With pre-filtering:
 === "Python"

    ```python
-    table.search("puppy").limit(10).where("meta='foo'").to_list()
+    table.search("puppy").limit(10).where("meta='foo'", prefilte=True).to_list()
+    ```
+
+=== "TypeScript"
+
+    ```typescript
+    await tbl
+    .search("puppy")
+    .select(["id", "doc"])
+    .limit(10)
+    .where("meta='foo'")
+    .prefilter(true)
+    .toArray();
+    ```
+
+=== "Rust"
+
+    ```rust
+    table
+        .query()
+        .full_text_search(FullTextSearchQuery::new("puppy".to_owned()))
+        .select(lancedb::query::Select::Columns(vec!["doc".to_owned()]))
+        .limit(10)
+        .only_if("meta='foo'")
+        .execute()
+        .await?;
+    ```
+
+With post-filtering:
+=== "Python"
+
+    ```python
+    table.search("puppy").limit(10).where("meta='foo'", prefilte=False).to_list()
    ```

 === "TypeScript"
@@ -130,6 +163,7 @@ This can be invoked via the familiar `where` syntax:
    .select(["id", "doc"])
    .limit(10)
    .where("meta='foo'")
+    .prefilter(false)
    .toArray();
    ```

@@ -140,6 +174,7 @@ This can be invoked via the familiar `where` syntax:
        .query()
        .full_text_search(FullTextSearchQuery::new(words[0].to_owned()))
        .select(lancedb::query::Select::Columns(vec!["doc".to_owned()]))
+        .postfilter()
        .limit(10)
        .only_if("meta='foo'")
        .execute()
@@ -160,3 +195,35 @@ To search for a phrase, the index must be created with `with_position=True`:
 table.create_fts_index("text", use_tantivy=False, with_position=True)
 ```
 This will allow you to search for phrases, but it will also significantly increase the index size and indexing time.
+
+
+## Incremental indexing
+
+LanceDB supports incremental indexing, which means you can add new records to the table without reindexing the entire table.
+
+This can make the query more efficient, especially when the table is large and the new records are relatively small.
+
+=== "Python"
+
+    ```python
+    table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
+    table.optimize()
+    ```
+
+=== "TypeScript"
+
+    ```typescript
+    await tbl.add([{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" }]);
+    await tbl.optimize();
+    ```
+
+=== "Rust"
+
+    ```rust
+    let more_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
+    tbl.add(more_data).execute().await?;
+    tbl.optimize(OptimizeAction::All).execute().await?;
+    ```
+!!! note
+
+    New data added after creating the FTS index will appear in search results while incremental index is still progress, but with increased latency due to a flat search on the unindexed portion. LanceDB Cloud automates this merging process, minimizing the impact on search speed. 
--- a/docs/src/fts_tantivy.md
+++ b/docs/src/fts_tantivy.md
@@ -153,9 +153,7 @@ table.create_fts_index(["title", "content"], use_tantivy=True, writer_heap_size=

 ## Current limitations

-1. Currently we do not yet support incremental writes.
-   If you add data after FTS index creation, it won't be reflected
-   in search results until you do a full reindex.
+1. New data added after creating the FTS index will appear in search results, but with increased latency due to a flat search on the unindexed portion. Re-indexing with `create_fts_index` will reduce latency. LanceDB Cloud automates this merging process, minimizing the impact on search speed. 

 2. We currently only support local filesystem paths for the FTS index.
   This is a tantivy limitation. We've implemented an object store plugin
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -85,13 +85,13 @@ Initialize a LanceDB connection and create a table


        ```ts
-        --8<-- "nodejs/examples/basic.ts:create_table"
+        --8<-- "nodejs/examples/basic.test.ts:create_table"
        ```

        This will infer the schema from the provided data. If you want to explicitly provide a schema, you can use `apache-arrow` to declare a schema

        ```ts
-        --8<-- "nodejs/examples/basic.ts:create_table_with_schema"
+        --8<-- "nodejs/examples/basic.test.ts:create_table_with_schema"
        ```

        !!! info "Note"
@@ -100,14 +100,14 @@ Initialize a LanceDB connection and create a table
            passed in will NOT be appended to the table in that case.

        ```ts
-        --8<-- "nodejs/examples/basic.ts:create_table_exists_ok"
+        --8<-- "nodejs/examples/basic.test.ts:create_table_exists_ok"
        ```

        Sometimes you want to make sure that you start fresh. If you want to
        overwrite the table, you can pass in mode: "overwrite" to the createTable function.

        ```ts
-        --8<-- "nodejs/examples/basic.ts:create_table_overwrite"
+        --8<-- "nodejs/examples/basic.test.ts:create_table_overwrite"
        ```

    === "vectordb (deprecated)"
@@ -227,7 +227,7 @@ LanceDB supports float16 data type!
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:create_f16_table"
+        --8<-- "nodejs/examples/basic.test.ts:create_f16_table"
        ```

    === "vectordb (deprecated)"
@@ -274,7 +274,7 @@ table = db.create_table(table_name, schema=Content)

 Sometimes your data model may contain nested objects.
 For example, you may want to store the document string
-and the document soure name as a nested Document object:
+and the document source name as a nested Document object:

 ```python
 class Document(BaseModel):
@@ -455,7 +455,7 @@ You can create an empty table for scenarios where you want to add data to the ta
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.ts:create_empty_table"
+        --8<-- "nodejs/examples/basic.test.ts:create_empty_table"
        ```

    === "vectordb (deprecated)"
@@ -466,7 +466,7 @@ You can create an empty table for scenarios where you want to add data to the ta

 ## Adding to a table

-After a table has been created, you can always add more data to it usind the `add` method
+After a table has been created, you can always add more data to it using the `add` method

 === "Python"
    You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
@@ -535,7 +535,7 @@ After a table has been created, you can always add more data to it usind the `ad
    ```

    ??? "Ingesting Pydantic models with LanceDB embedding API"
-        When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` feild as None to allow LanceDB to automatically vectorize the data.
+        When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` field as None to allow LanceDB to automatically vectorize the data.

        ```python
        import lancedb
@@ -790,6 +790,27 @@ Use the `drop_table()` method on the database to remove a table.
      This permanently removes the table and is not recoverable, unlike deleting rows.
      If the table does not exist an exception is raised.

+## Handling bad vectors
+
+In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
+invalid vector values are handled. Invalid vectors are vectors that are not valid
+because:
+
+1. They are the wrong dimension
+2. They contain NaN values
+3. They are null but are on a non-nullable field
+
+By default, LanceDB will raise an error if it encounters a bad vector. You can
+also choose one of the following options:
+
+* `drop`: Ignore rows with bad vectors
+* `fill`: Replace bad values (NaNs) or missing values (too few dimensions) with
+    the fill value specified in the `fill_value` parameter. An input like
+    `[1.0, NaN, 3.0]` will be replaced with `[1.0, 0.0, 3.0]` if `fill_value=0.0`.
+* `null`: Replace bad vectors with null (only works if the column is nullable).
+    A bad vector `[1.0, NaN, 3.0]` will be replaced with `null` if the column is
+    nullable. If the vector column is non-nullable, then bad vectors will cause an
+    error

 ## Consistency

@@ -859,4 +880,4 @@ There are three possible settings for `read_consistency_interval`:

 Learn the best practices on creating an ANN index and getting the most out of it.

-[^1]: The `vectordb` package is a legacy package that is  deprecated in favor of `@lancedb/lancedb`.  The `vectordb` package will continue to receive bug fixes and security updates until September 2024.  We recommend all new projects use `@lancedb/lancedb`.  See the [migration guide](migration.md) for more information.
+[^1]: The `vectordb` package is a legacy package that is  deprecated in favor of `@lancedb/lancedb`.  The `vectordb` package will continue to receive bug fixes and security updates until September 2024.  We recommend all new projects use `@lancedb/lancedb`.  See the [migration guide](../migration.md) for more information.
--- a/docs/src/notebooks/hybrid_search.ipynb
+++ b/docs/src/notebooks/hybrid_search.ipynb
--- a/docs/src/reranking/cohere.md
+++ b/docs/src/reranking/cohere.md
@@ -6,6 +6,9 @@ This re-ranker uses the [Cohere](https://cohere.ai/) API to rerank the search re
 !!! note
    Supported Query Types: Hybrid, Vector, FTS

+```shell
+pip install cohere
+```

 ```python
 import numpy
--- a/docs/src/reranking/index.md
+++ b/docs/src/reranking/index.md
@@ -9,6 +9,7 @@ LanceDB comes with some built-in rerankers. Some of the rerankers that are avail
 | `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
 | `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
 | `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
+| `VoyageAIReranker` | Uses voyageai Reranker API to rerank results | Vector, FTS, Hybrid |


 ## Using a Reranker
@@ -73,6 +74,7 @@ LanceDB comes with some built-in rerankers. Here are some of the rerankers that
 - [Jina Reranker](./jina.md)
 - [AnswerDotAI Rerankers](./answerdotai.md)
 - [Reciprocal Rank Fusion Reranker](./rrf.md)
+- [VoyageAI Reranker](./voyageai.md)

 ## Creating Custom Rerankers

--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -58,9 +58,9 @@ db.create_table("my_vectors", data=data)
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/search.ts:import"
+        --8<-- "nodejs/examples/search.test.ts:import"

-        --8<-- "nodejs/examples/search.ts:search1"
+        --8<-- "nodejs/examples/search.test.ts:search1"
        ```


@@ -89,7 +89,7 @@ By default, `l2` will be used as metric type. You can specify the metric type as
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/search.ts:search2"
+        --8<-- "nodejs/examples/search.test.ts:search2"
        ```

    === "vectordb (deprecated)"
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -49,7 +49,7 @@ const tbl = await db.createTable('myVectors', data)
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/filtering.ts:search"
+        --8<-- "nodejs/examples/filtering.test.ts:search"
        ```

    === "vectordb (deprecated)"
@@ -91,7 +91,7 @@ For example, the following filter string is acceptable:
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/filtering.ts:vec_search"
+        --8<-- "nodejs/examples/filtering.test.ts:vec_search"
        ```

    === "vectordb (deprecated)"
@@ -169,7 +169,7 @@ You can also filter your data without search.
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/filtering.ts:sql_search"
+        --8<-- "nodejs/examples/filtering.test.ts:sql_search"
        ```

    === "vectordb (deprecated)"
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.13.0-beta.1</version>
+        <version>0.13.0-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.13.0-beta.1</version>
+    <version>0.13.0-final.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.13.0-beta.1",
+      "version": "0.13.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,12 +52,12 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.13.0-beta.1",
-        "@lancedb/vectordb-darwin-x64": "0.13.0-beta.1",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.1",
-        "@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.1",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.1",
-        "@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.1"
+        "@lancedb/vectordb-darwin-arm64": "0.13.0",
+        "@lancedb/vectordb-darwin-x64": "0.13.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.13.0",
+        "@lancedb/vectordb-win32-arm64-msvc": "0.13.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.13.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -328,9 +328,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0-beta.1.tgz",
-      "integrity": "sha512-beOrf6selCzzhLgDG8Nibma4nO/CSnA1wUKRmlJHEPtGcg7PW18z6MP/nfwQMpMR/FLRfTo8pPTbpzss47MiQQ==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0.tgz",
+      "integrity": "sha512-8hdcjkRmgrdQYf1jN+DyZae40LIv8UUfnWy70Uid5qy63sSvRW/+MvIdqIPFr9QlLUXmpyyQuX0y3bZhUR99cQ==",
      "cpu": [
        "arm64"
      ],
@@ -340,9 +340,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0-beta.1.tgz",
-      "integrity": "sha512-YdraGRF/RbJRkKh0v3xT03LUhq47T2GtCvJ5gZp8wKlh4pHa8LuhLU0DIdvmG/DT5vuQA+td8HDkBm/e3EOdNg==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0.tgz",
+      "integrity": "sha512-fWzAY4l5SQtNfMYh80v+M66ugZHhdxbkpk5mNEv6Zsug3DL6kRj3Uv31/i0wgzY6F5G3LUlbjZerN+eTnDLwOw==",
      "cpu": [
        "x64"
      ],
@@ -352,9 +352,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0-beta.1.tgz",
-      "integrity": "sha512-Pp0O/uhEqof1oLaWrNbv+Ym+q8kBkiCqaA5+2eAZ6a3e9U+Ozkvb0FQrHuyi9adJ5wKQ4NabyQE9BMf2bYpOnQ==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0.tgz",
+      "integrity": "sha512-ltwAT9baOSuR5YiGykQXPC8/HGYF13vpI47qxhP9yfgiz9pA8EUn8p8YrBRzq7J4DIZ4b8JSVDXQnMIqEtB4Kg==",
      "cpu": [
        "arm64"
      ],
@@ -364,9 +364,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0-beta.1.tgz",
-      "integrity": "sha512-y8nxOye4egfWF5FGED9EfkmZ1O5HnRLU4a61B8m5JSpkivO9v2epTcbYN0yt/7ZFCgtqMfJ8VW4Mi7qQcz3KDA==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0.tgz",
+      "integrity": "sha512-MiT/RBlMPGGRh7BX+MXwRuNiiUnKmuDcHH8nm88IH28T7TQxXIbA9w6UpSg5m9f3DgKQI2K8oLi29oKIB8ZwDQ==",
      "cpu": [
        "x64"
      ],
@@ -376,9 +376,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0-beta.1.tgz",
-      "integrity": "sha512-STMDP9dp0TBLkB3ro+16pKcGy6bmbhRuEZZZ1Tp5P75yTPeVh4zIgWkidMdU1qBbEYM7xacnsp9QAwgLnMU/Ow==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0.tgz",
+      "integrity": "sha512-SovP/hwWYLJIy65DKbVuXlBPTb/nwvVpTO6dh9zRch+L5ek6JmVAkwsfeTS2p5bMa8VPujsCXYUAVuCDEJU8wg==",
      "cpu": [
        "x64"
      ],
@@ -1501,9 +1501,9 @@
      "dev": true
    },
    "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
      "dev": true,
      "dependencies": {
        "path-key": "^3.1.0",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -89,11 +89,13 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-arm64": "0.13.0-beta.1",
-    "@lancedb/vectordb-darwin-x64": "0.13.0-beta.1",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.1",
-    "@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.1",
-    "@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.1",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.1"
+    "@lancedb/vectordb-darwin-x64": "0.13.0",
+    "@lancedb/vectordb-darwin-arm64": "0.13.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.13.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
+    "@lancedb/vectordb-linux-x64-musl": "0.13.0",
+    "@lancedb/vectordb-linux-arm64-musl": "0.13.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.13.0",
+    "@lancedb/vectordb-win32-arm64-msvc": "0.13.0"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.13.0-beta.1"
+version = "0.13.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -187,6 +187,81 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
      },
    );

+    // TODO: https://github.com/lancedb/lancedb/issues/1832
+    it.skip("should be able to omit nullable fields", async () => {
+      const db = await connect(tmpDir.name);
+      const schema = new arrow.Schema([
+        new arrow.Field(
+          "vector",
+          new arrow.FixedSizeList(
+            2,
+            new arrow.Field("item", new arrow.Float64()),
+          ),
+          true,
+        ),
+        new arrow.Field("item", new arrow.Utf8(), true),
+        new arrow.Field("price", new arrow.Float64(), false),
+      ]);
+      const table = await db.createEmptyTable("test", schema);
+
+      const data1 = { item: "foo", price: 10.0 };
+      await table.add([data1]);
+      const data2 = { vector: [3.1, 4.1], price: 2.0 };
+      await table.add([data2]);
+      const data3 = { vector: [5.9, 26.5], item: "bar", price: 3.0 };
+      await table.add([data3]);
+
+      let res = await table.query().limit(10).toArray();
+      const resVector = res.map((r) => r.get("vector").toArray());
+      expect(resVector).toEqual([null, data2.vector, data3.vector]);
+      const resItem = res.map((r) => r.get("item").toArray());
+      expect(resItem).toEqual(["foo", null, "bar"]);
+      const resPrice = res.map((r) => r.get("price").toArray());
+      expect(resPrice).toEqual([10.0, 2.0, 3.0]);
+
+      const data4 = { item: "foo" };
+      // We can't omit a column if it's not nullable
+      await expect(table.add([data4])).rejects.toThrow("Invalid user input");
+
+      // But we can alter columns to make them nullable
+      await table.alterColumns([{ path: "price", nullable: true }]);
+      await table.add([data4]);
+
+      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
+      expect(res).toEqual([data1, data2, data3, data4]);
+    });
+
+    it("should be able to insert nullable data for non-nullable fields", async () => {
+      const db = await connect(tmpDir.name);
+      const schema = new arrow.Schema([
+        new arrow.Field("x", new arrow.Float64(), false),
+        new arrow.Field("id", new arrow.Utf8(), false),
+      ]);
+      const table = await db.createEmptyTable("test", schema);
+
+      const data1 = { x: 4.1, id: "foo" };
+      await table.add([data1]);
+      const res = (await table.query().toArray())[0];
+      expect(res.x).toEqual(data1.x);
+      expect(res.id).toEqual(data1.id);
+
+      const data2 = { x: null, id: "bar" };
+      await expect(table.add([data2])).rejects.toThrow(
+        "declared as non-nullable but contains null values",
+      );
+
+      // But we can alter columns to make them nullable
+      await table.alterColumns([{ path: "x", nullable: true }]);
+      await table.add([data2]);
+
+      const res2 = await table.query().toArray();
+      expect(res2.length).toBe(2);
+      expect(res2[0].x).toEqual(data1.x);
+      expect(res2[0].id).toEqual(data1.id);
+      expect(res2[1].x).toBeNull();
+      expect(res2[1].id).toEqual(data2.id);
+    });
+
    it("should return the table as an instance of an arrow table", async () => {
      const arrowTbl = await table.toArrow();
      expect(arrowTbl).toBeInstanceOf(ArrowTable);
@@ -402,6 +477,54 @@ describe("When creating an index", () => {
    expect(rst.numRows).toBe(1);
  });

+  it("should create and search IVF_HNSW indices", async () => {
+    await tbl.createIndex("vec", {
+      config: Index.hnswSq(),
+    });
+
+    // check index directory
+    const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
+    expect(fs.readdirSync(indexDir)).toHaveLength(1);
+    const indices = await tbl.listIndices();
+    expect(indices.length).toBe(1);
+    expect(indices[0]).toEqual({
+      name: "vec_idx",
+      indexType: "IvfHnswSq",
+      columns: ["vec"],
+    });
+
+    // Search without specifying the column
+    let rst = await tbl
+      .query()
+      .limit(2)
+      .nearestTo(queryVec)
+      .distanceType("dot")
+      .toArrow();
+    expect(rst.numRows).toBe(2);
+
+    // Search using `vectorSearch`
+    rst = await tbl.vectorSearch(queryVec).limit(2).toArrow();
+    expect(rst.numRows).toBe(2);
+
+    // Search with specifying the column
+    const rst2 = await tbl
+      .query()
+      .limit(2)
+      .nearestTo(queryVec)
+      .column("vec")
+      .toArrow();
+    expect(rst2.numRows).toBe(2);
+    expect(rst.toString()).toEqual(rst2.toString());
+
+    // test offset
+    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
+    expect(rst.numRows).toBe(1);
+
+    // test ef
+    rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
+    expect(rst.numRows).toBe(2);
+  });
+
  it("should be able to query unindexed data", async () => {
    await tbl.createIndex("vec");
    await tbl.add([
@@ -998,4 +1121,18 @@ describe("column name options", () => {
    const results = await table.query().where("`camelCase` = 1").toArray();
    expect(results[0].camelCase).toBe(1);
  });
+
+  test("can make multiple vector queries in one go", async () => {
+    const results = await table
+      .query()
+      .nearestTo([0.1, 0.2])
+      .addQueryVector([0.1, 0.2])
+      .limit(1)
+      .toArray();
+    console.log(results);
+    expect(results.length).toBe(2);
+    results.sort((a, b) => a.query_index - b.query_index);
+    expect(results[0].query_index).toBe(0);
+    expect(results[1].query_index).toBe(1);
+  });
 });
--- a/nodejs/biome.json
+++ b/nodejs/biome.json
@@ -9,7 +9,8 @@
      "**/native.js",
      "**/native.d.ts",
      "**/npm/**/*",
-      "**/.vscode/**"
+      "**/.vscode/**",
+      "./examples/*"
    ]
  },
  "formatter": {
--- a/nodejs/examples/ann_indexes.test.ts
+++ b/nodejs/examples/ann_indexes.test.ts
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+// --8<-- [start:import]
+import * as lancedb from "@lancedb/lancedb";
+import { VectorQuery } from "@lancedb/lancedb";
+// --8<-- [end:import]
+import { withTempDirectory } from "./util.ts";
+
+test("ann index examples", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    // --8<-- [start:ingest]
+    const db = await lancedb.connect(databaseDir);
+
+    const data = Array.from({ length: 5_000 }, (_, i) => ({
+      vector: Array(128).fill(i),
+      id: `${i}`,
+      content: "",
+      longId: `${i}`,
+    }));
+
+    const table = await db.createTable("my_vectors", data, {
+      mode: "overwrite",
+    });
+    await table.createIndex("vector", {
+      config: lancedb.Index.ivfPq({
+        numPartitions: 10,
+        numSubVectors: 16,
+      }),
+    });
+    // --8<-- [end:ingest]
+
+    // --8<-- [start:search1]
+    const search = table.search(Array(128).fill(1.2)).limit(2) as VectorQuery;
+    const results1 = await search.nprobes(20).refineFactor(10).toArray();
+    // --8<-- [end:search1]
+    expect(results1.length).toBe(2);
+
+    // --8<-- [start:search2]
+    const results2 = await table
+      .search(Array(128).fill(1.2))
+      .where("id != '1141'")
+      .limit(2)
+      .toArray();
+    // --8<-- [end:search2]
+    expect(results2.length).toBe(2);
+
+    // --8<-- [start:search3]
+    const results3 = await table
+      .search(Array(128).fill(1.2))
+      .select(["id"])
+      .limit(2)
+      .toArray();
+    // --8<-- [end:search3]
+    expect(results3.length).toBe(2);
+  });
+}, 100_000);
--- a/nodejs/examples/ann_indexes.ts
+++ b/nodejs/examples/ann_indexes.ts
@@ -1,49 +0,0 @@
-// --8<-- [start:import]
-import * as lancedb from "@lancedb/lancedb";
-// --8<-- [end:import]
-
-// --8<-- [start:ingest]
-const db = await lancedb.connect("/tmp/lancedb/");
-
-const data = Array.from({ length: 10_000 }, (_, i) => ({
-  vector: Array(1536).fill(i),
-  id: `${i}`,
-  content: "",
-  longId: `${i}`,
-}));
-
-const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
-await table.createIndex("vector", {
-  config: lancedb.Index.ivfPq({
-    numPartitions: 16,
-    numSubVectors: 48,
-  }),
-});
-// --8<-- [end:ingest]
-
-// --8<-- [start:search1]
-const _results1 = await table
-  .search(Array(1536).fill(1.2))
-  .limit(2)
-  .nprobes(20)
-  .refineFactor(10)
-  .toArray();
-// --8<-- [end:search1]
-
-// --8<-- [start:search2]
-const _results2 = await table
-  .search(Array(1536).fill(1.2))
-  .where("id != '1141'")
-  .limit(2)
-  .toArray();
-// --8<-- [end:search2]
-
-// --8<-- [start:search3]
-const _results3 = await table
-  .search(Array(1536).fill(1.2))
-  .select(["id"])
-  .limit(2)
-  .toArray();
-// --8<-- [end:search3]
-
-console.log("Ann indexes: done");
--- a/nodejs/examples/basic.test.ts
+++ b/nodejs/examples/basic.test.ts
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+// --8<--  [start:imports]
+import * as lancedb from "@lancedb/lancedb";
+import * as arrow from "apache-arrow";
+import {
+  Field,
+  FixedSizeList,
+  Float16,
+  Int32,
+  Schema,
+  Utf8,
+} from "apache-arrow";
+// --8<-- [end:imports]
+import { withTempDirectory } from "./util.ts";
+
+test("basic table examples", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    // --8<-- [start:connect]
+    const db = await lancedb.connect(databaseDir);
+    // --8<-- [end:connect]
+    {
+      // --8<-- [start:create_table]
+      const _tbl = await db.createTable(
+        "myTable",
+        [
+          { vector: [3.1, 4.1], item: "foo", price: 10.0 },
+          { vector: [5.9, 26.5], item: "bar", price: 20.0 },
+        ],
+        { mode: "overwrite" },
+      );
+      // --8<-- [end:create_table]
+
+      const data = [
+        { vector: [3.1, 4.1], item: "foo", price: 10.0 },
+        { vector: [5.9, 26.5], item: "bar", price: 20.0 },
+      ];
+
+      {
+        // --8<-- [start:create_table_exists_ok]
+        const tbl = await db.createTable("myTable", data, {
+          existOk: true,
+        });
+        // --8<-- [end:create_table_exists_ok]
+        expect(await tbl.countRows()).toBe(2);
+      }
+      {
+        // --8<-- [start:create_table_overwrite]
+        const tbl = await db.createTable("myTable", data, {
+          mode: "overwrite",
+        });
+        // --8<-- [end:create_table_overwrite]
+        expect(await tbl.countRows()).toBe(2);
+      }
+    }
+
+    await db.dropTable("myTable");
+
+    {
+      // --8<-- [start:create_table_with_schema]
+      const schema = new arrow.Schema([
+        new arrow.Field(
+          "vector",
+          new arrow.FixedSizeList(
+            2,
+            new arrow.Field("item", new arrow.Float32(), true),
+          ),
+        ),
+        new arrow.Field("item", new arrow.Utf8(), true),
+        new arrow.Field("price", new arrow.Float32(), true),
+      ]);
+      const data = [
+        { vector: [3.1, 4.1], item: "foo", price: 10.0 },
+        { vector: [5.9, 26.5], item: "bar", price: 20.0 },
+      ];
+      const tbl = await db.createTable("myTable", data, {
+        schema,
+      });
+      // --8<-- [end:create_table_with_schema]
+      expect(await tbl.countRows()).toBe(2);
+    }
+
+    {
+      // --8<-- [start:create_empty_table]
+
+      const schema = new arrow.Schema([
+        new arrow.Field("id", new arrow.Int32()),
+        new arrow.Field("name", new arrow.Utf8()),
+      ]);
+
+      const emptyTbl = await db.createEmptyTable("empty_table", schema);
+      // --8<-- [end:create_empty_table]
+      expect(await emptyTbl.countRows()).toBe(0);
+    }
+    {
+      // --8<-- [start:open_table]
+      const _tbl = await db.openTable("myTable");
+      // --8<-- [end:open_table]
+    }
+
+    {
+      // --8<-- [start:table_names]
+      const tableNames = await db.tableNames();
+      // --8<-- [end:table_names]
+      expect(tableNames).toEqual(["empty_table", "myTable"]);
+    }
+
+    const tbl = await db.openTable("myTable");
+    {
+      // --8<-- [start:add_data]
+      const data = [
+        { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
+        { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
+      ];
+      await tbl.add(data);
+      // --8<-- [end:add_data]
+    }
+    {
+      // --8<-- [start:vector_search]
+      const res = await tbl.search([100, 100]).limit(2).toArray();
+      // --8<-- [end:vector_search]
+      expect(res.length).toBe(2);
+    }
+    {
+      const data = Array.from({ length: 1000 })
+        .fill(null)
+        .map(() => ({
+          vector: [Math.random(), Math.random()],
+          item: "autogen",
+          price: Math.round(Math.random() * 100),
+        }));
+
+      await tbl.add(data);
+    }
+
+    // --8<-- [start:create_index]
+    await tbl.createIndex("vector");
+    // --8<-- [end:create_index]
+
+    // --8<-- [start:delete_rows]
+    await tbl.delete('item = "fizz"');
+    // --8<-- [end:delete_rows]
+
+    // --8<-- [start:drop_table]
+    await db.dropTable("myTable");
+    // --8<-- [end:drop_table]
+    await db.dropTable("empty_table");
+
+    {
+      // --8<-- [start:create_f16_table]
+      const db = await lancedb.connect(databaseDir);
+      const dim = 16;
+      const total = 10;
+      const f16Schema = new Schema([
+        new Field("id", new Int32()),
+        new Field(
+          "vector",
+          new FixedSizeList(dim, new Field("item", new Float16(), true)),
+          false,
+        ),
+      ]);
+      const data = lancedb.makeArrowTable(
+        Array.from(Array(total), (_, i) => ({
+          id: i,
+          vector: Array.from(Array(dim), Math.random),
+        })),
+        { schema: f16Schema },
+      );
+      const _table = await db.createTable("f16_tbl", data);
+      // --8<-- [end:create_f16_table]
+      await db.dropTable("f16_tbl");
+    }
+  });
+});
--- a/nodejs/examples/basic.ts
+++ b/nodejs/examples/basic.ts
@@ -1,162 +0,0 @@
-// --8<--  [start:imports]
-import * as lancedb from "@lancedb/lancedb";
-import * as arrow from "apache-arrow";
-import {
-  Field,
-  FixedSizeList,
-  Float16,
-  Int32,
-  Schema,
-  Utf8,
-} from "apache-arrow";
-
-// --8<-- [end:imports]
-
-// --8<-- [start:connect]
-const uri = "/tmp/lancedb/";
-const db = await lancedb.connect(uri);
-// --8<-- [end:connect]
-{
-  // --8<-- [start:create_table]
-  const tbl = await db.createTable(
-    "myTable",
-    [
-      { vector: [3.1, 4.1], item: "foo", price: 10.0 },
-      { vector: [5.9, 26.5], item: "bar", price: 20.0 },
-    ],
-    { mode: "overwrite" },
-  );
-  // --8<-- [end:create_table]
-
-  const data = [
-    { vector: [3.1, 4.1], item: "foo", price: 10.0 },
-    { vector: [5.9, 26.5], item: "bar", price: 20.0 },
-  ];
-
-  {
-    // --8<-- [start:create_table_exists_ok]
-    const tbl = await db.createTable("myTable", data, {
-      existsOk: true,
-    });
-    // --8<-- [end:create_table_exists_ok]
-  }
-  {
-    // --8<-- [start:create_table_overwrite]
-    const _tbl = await db.createTable("myTable", data, {
-      mode: "overwrite",
-    });
-    // --8<-- [end:create_table_overwrite]
-  }
-}
-
-{
-  // --8<-- [start:create_table_with_schema]
-  const schema = new arrow.Schema([
-    new arrow.Field(
-      "vector",
-      new arrow.FixedSizeList(
-        2,
-        new arrow.Field("item", new arrow.Float32(), true),
-      ),
-    ),
-    new arrow.Field("item", new arrow.Utf8(), true),
-    new arrow.Field("price", new arrow.Float32(), true),
-  ]);
-  const data = [
-    { vector: [3.1, 4.1], item: "foo", price: 10.0 },
-    { vector: [5.9, 26.5], item: "bar", price: 20.0 },
-  ];
-  const _tbl = await db.createTable("myTable", data, {
-    schema,
-  });
-  // --8<-- [end:create_table_with_schema]
-}
-
-{
-  // --8<-- [start:create_empty_table]
-
-  const schema = new arrow.Schema([
-    new arrow.Field("id", new arrow.Int32()),
-    new arrow.Field("name", new arrow.Utf8()),
-  ]);
-
-  const empty_tbl = await db.createEmptyTable("empty_table", schema);
-  // --8<-- [end:create_empty_table]
-}
-{
-  // --8<-- [start:open_table]
-  const _tbl = await db.openTable("myTable");
-  // --8<-- [end:open_table]
-}
-
-{
-  // --8<-- [start:table_names]
-  const tableNames = await db.tableNames();
-  console.log(tableNames);
-  // --8<-- [end:table_names]
-}
-
-const tbl = await db.openTable("myTable");
-{
-  // --8<-- [start:add_data]
-  const data = [
-    { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
-    { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
-  ];
-  await tbl.add(data);
-  // --8<-- [end:add_data]
-}
-{
-  // --8<-- [start:vector_search]
-  const _res = tbl.search([100, 100]).limit(2).toArray();
-  // --8<-- [end:vector_search]
-}
-{
-  const data = Array.from({ length: 1000 })
-    .fill(null)
-    .map(() => ({
-      vector: [Math.random(), Math.random()],
-      item: "autogen",
-      price: Math.round(Math.random() * 100),
-    }));
-
-  await tbl.add(data);
-}
-
-// --8<-- [start:create_index]
-await tbl.createIndex("vector");
-// --8<-- [end:create_index]
-
-// --8<-- [start:delete_rows]
-await tbl.delete('item = "fizz"');
-// --8<-- [end:delete_rows]
-
-// --8<-- [start:drop_table]
-await db.dropTable("myTable");
-// --8<-- [end:drop_table]
-await db.dropTable("empty_table");
-
-{
-  // --8<-- [start:create_f16_table]
-  const db = await lancedb.connect("/tmp/lancedb");
-  const dim = 16;
-  const total = 10;
-  const f16Schema = new Schema([
-    new Field("id", new Int32()),
-    new Field(
-      "vector",
-      new FixedSizeList(dim, new Field("item", new Float16(), true)),
-      false,
-    ),
-  ]);
-  const data = lancedb.makeArrowTable(
-    Array.from(Array(total), (_, i) => ({
-      id: i,
-      vector: Array.from(Array(dim), Math.random),
-    })),
-    { schema: f16Schema },
-  );
-  const _table = await db.createTable("f16_tbl", data);
-  // --8<-- [end:create_f16_table]
-  await db.dropTable("f16_tbl");
-}
--- a/nodejs/examples/custom_embedding_function.test.ts
+++ b/nodejs/examples/custom_embedding_function.test.ts
@@ -0,0 +1,76 @@
+import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+// --8<-- [start:imports]
+import * as lancedb from "@lancedb/lancedb";
+import {
+  LanceSchema,
+  TextEmbeddingFunction,
+  getRegistry,
+  register,
+} from "@lancedb/lancedb/embedding";
+// --8<-- [end:imports]
+import { withTempDirectory } from "./util.ts";
+
+// --8<-- [start:embedding_impl]
+@register("sentence-transformers")
+class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
+  name = "Xenova/all-miniLM-L6-v2";
+  #ndims!: number;
+  extractor!: FeatureExtractionPipeline;
+
+  async init() {
+    this.extractor = await pipeline("feature-extraction", this.name, {
+      dtype: "fp32",
+    });
+    this.#ndims = await this.generateEmbeddings(["hello"]).then(
+      (e) => e[0].length,
+    );
+  }
+
+  ndims() {
+    return this.#ndims;
+  }
+
+  toJSON() {
+    return {
+      name: this.name,
+    };
+  }
+  async generateEmbeddings(texts: string[]) {
+    const output = await this.extractor(texts, {
+      pooling: "mean",
+      normalize: true,
+    });
+    return output.tolist();
+  }
+}
+// -8<-- [end:embedding_impl]
+
+test("Registry examples", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    // --8<-- [start:call_custom_function]
+    const registry = getRegistry();
+
+    const sentenceTransformer = await registry
+      .get<SentenceTransformersEmbeddings>("sentence-transformers")!
+      .create();
+
+    const schema = LanceSchema({
+      vector: sentenceTransformer.vectorField(),
+      text: sentenceTransformer.sourceField(),
+    });
+
+    const db = await lancedb.connect(databaseDir);
+    const table = await db.createEmptyTable("table", schema, {
+      mode: "overwrite",
+    });
+
+    await table.add([{ text: "hello" }, { text: "world" }]);
+
+    const results = await table.search("greeting").limit(1).toArray();
+    // -8<-- [end:call_custom_function]
+    expect(results.length).toBe(1);
+  });
+}, 100_000);
--- a/nodejs/examples/custom_embedding_function.ts
+++ b/nodejs/examples/custom_embedding_function.ts
@@ -1,64 +0,0 @@
-// --8<-- [start:imports]
-import * as lancedb from "@lancedb/lancedb";
-import {
-  LanceSchema,
-  TextEmbeddingFunction,
-  getRegistry,
-  register,
-} from "@lancedb/lancedb/embedding";
-import { pipeline } from "@xenova/transformers";
-// --8<-- [end:imports]
-
-// --8<-- [start:embedding_impl]
-@register("sentence-transformers")
-class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
-  name = "Xenova/all-miniLM-L6-v2";
-  #ndims!: number;
-  extractor: any;
-
-  async init() {
-    this.extractor = await pipeline("feature-extraction", this.name);
-    this.#ndims = await this.generateEmbeddings(["hello"]).then(
-      (e) => e[0].length,
-    );
-  }
-
-  ndims() {
-    return this.#ndims;
-  }
-
-  toJSON() {
-    return {
-      name: this.name,
-    };
-  }
-  async generateEmbeddings(texts: string[]) {
-    const output = await this.extractor(texts, {
-      pooling: "mean",
-      normalize: true,
-    });
-    return output.tolist();
-  }
-}
-// -8<-- [end:embedding_impl]
-
-// --8<-- [start:call_custom_function]
-const registry = getRegistry();
-
-const sentenceTransformer = await registry
-  .get<SentenceTransformersEmbeddings>("sentence-transformers")!
-  .create();
-
-const schema = LanceSchema({
-  vector: sentenceTransformer.vectorField(),
-  text: sentenceTransformer.sourceField(),
-});
-
-const db = await lancedb.connect("/tmp/db");
-const table = await db.createEmptyTable("table", schema, { mode: "overwrite" });
-
-await table.add([{ text: "hello" }, { text: "world" }]);
-
-const results = await table.search("greeting").limit(1).toArray();
-console.log(results[0].text);
-// -8<-- [end:call_custom_function]
--- a/nodejs/examples/embedding.test.ts
+++ b/nodejs/examples/embedding.test.ts
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+// --8<-- [start:imports]
+import * as lancedb from "@lancedb/lancedb";
+import "@lancedb/lancedb/embedding/openai";
+import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
+import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
+import { type Float, Float32, Utf8 } from "apache-arrow";
+// --8<-- [end:imports]
+import { withTempDirectory } from "./util.ts";
+
+const openAiTest = process.env.OPENAI_API_KEY == null ? test.skip : test;
+
+openAiTest("openai embeddings", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    // --8<-- [start:openai_embeddings]
+    const db = await lancedb.connect(databaseDir);
+    const func = getRegistry()
+      .get("openai")
+      ?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
+
+    const wordsSchema = LanceSchema({
+      text: func.sourceField(new Utf8()),
+      vector: func.vectorField(),
+    });
+    const tbl = await db.createEmptyTable("words", wordsSchema, {
+      mode: "overwrite",
+    });
+    await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
+
+    const query = "greetings";
+    const actual = (await tbl.search(query).limit(1).toArray())[0];
+    // --8<-- [end:openai_embeddings]
+    expect(actual).toHaveProperty("text");
+  });
+});
+
+test("custom embedding function", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    // --8<-- [start:embedding_function]
+    const db = await lancedb.connect(databaseDir);
+
+    @register("my_embedding")
+    class MyEmbeddingFunction extends EmbeddingFunction<string> {
+      toJSON(): object {
+        return {};
+      }
+      ndims() {
+        return 3;
+      }
+      embeddingDataType(): Float {
+        return new Float32();
+      }
+      async computeQueryEmbeddings(_data: string) {
+        // This is a placeholder for a real embedding function
+        return [1, 2, 3];
+      }
+      async computeSourceEmbeddings(data: string[]) {
+        // This is a placeholder for a real embedding function
+        return Array.from({ length: data.length }).fill([
+          1, 2, 3,
+        ]) as number[][];
+      }
+    }
+
+    const func = new MyEmbeddingFunction();
+
+    const data = [{ text: "pepperoni" }, { text: "pineapple" }];
+
+    // Option 1: manually specify the embedding function
+    const table = await db.createTable("vectors", data, {
+      embeddingFunction: {
+        function: func,
+        sourceColumn: "text",
+        vectorColumn: "vector",
+      },
+      mode: "overwrite",
+    });
+
+    // Option 2: provide the embedding function through a schema
+
+    const schema = LanceSchema({
+      text: func.sourceField(new Utf8()),
+      vector: func.vectorField(),
+    });
+
+    const table2 = await db.createTable("vectors2", data, {
+      schema,
+      mode: "overwrite",
+    });
+    // --8<-- [end:embedding_function]
+    expect(await table.countRows()).toBe(2);
+    expect(await table2.countRows()).toBe(2);
+  });
+});
--- a/nodejs/examples/embedding.ts
+++ b/nodejs/examples/embedding.ts
@@ -1,83 +0,0 @@
-// --8<-- [start:imports]
-import * as lancedb from "@lancedb/lancedb";
-import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
-import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
-import { type Float, Float32, Utf8 } from "apache-arrow";
-// --8<-- [end:imports]
-
-{
-  // --8<-- [start:openai_embeddings]
-
-  const db = await lancedb.connect("/tmp/db");
-  const func = getRegistry()
-    .get("openai")
-    ?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
-
-  const wordsSchema = LanceSchema({
-    text: func.sourceField(new Utf8()),
-    vector: func.vectorField(),
-  });
-  const tbl = await db.createEmptyTable("words", wordsSchema, {
-    mode: "overwrite",
-  });
-  await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
-
-  const query = "greetings";
-  const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
-
-  // --8<-- [end:openai_embeddings]
-  console.log("result = ", actual.text);
-}
-
-{
-  // --8<-- [start:embedding_function]
-  const db = await lancedb.connect("/tmp/db");
-
-  @register("my_embedding")
-  class MyEmbeddingFunction extends EmbeddingFunction<string> {
-    toJSON(): object {
-      return {};
-    }
-    ndims() {
-      return 3;
-    }
-    embeddingDataType(): Float {
-      return new Float32();
-    }
-    async computeQueryEmbeddings(_data: string) {
-      // This is a placeholder for a real embedding function
-      return [1, 2, 3];
-    }
-    async computeSourceEmbeddings(data: string[]) {
-      // This is a placeholder for a real embedding function
-      return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
-    }
-  }
-
-  const func = new MyEmbeddingFunction();
-
-  const data = [{ text: "pepperoni" }, { text: "pineapple" }];
-
-  // Option 1: manually specify the embedding function
-  const table = await db.createTable("vectors", data, {
-    embeddingFunction: {
-      function: func,
-      sourceColumn: "text",
-      vectorColumn: "vector",
-    },
-    mode: "overwrite",
-  });
-
-  // Option 2: provide the embedding function through a schema
-
-  const schema = LanceSchema({
-    text: func.sourceField(new Utf8()),
-    vector: func.vectorField(),
-  });
-
-  const table2 = await db.createTable("vectors2", data, {
-    schema,
-    mode: "overwrite",
-  });
-  // --8<-- [end:embedding_function]
-}
--- a/nodejs/examples/filtering.test.ts
+++ b/nodejs/examples/filtering.test.ts
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+import * as lancedb from "@lancedb/lancedb";
+import { withTempDirectory } from "./util.ts";
+
+test("filtering examples", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    const db = await lancedb.connect(databaseDir);
+
+    const data = Array.from({ length: 10_000 }, (_, i) => ({
+      vector: Array(1536).fill(i),
+      id: i,
+      item: `item ${i}`,
+      strId: `${i}`,
+    }));
+
+    const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
+
+    // --8<-- [start:search]
+    const _result = await tbl
+      .search(Array(1536).fill(0.5))
+      .limit(1)
+      .where("id = 10")
+      .toArray();
+    // --8<-- [end:search]
+
+    // --8<-- [start:vec_search]
+    const result = await (
+      tbl.search(Array(1536).fill(0)) as lancedb.VectorQuery
+    )
+      .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
+      .postfilter()
+      .toArray();
+    // --8<-- [end:vec_search]
+    expect(result.length).toBe(0);
+
+    // --8<-- [start:sql_search]
+    await tbl.query().where("id = 10").limit(10).toArray();
+    // --8<-- [end:sql_search]
+  });
+});
--- a/nodejs/examples/filtering.ts
+++ b/nodejs/examples/filtering.ts
@@ -1,34 +0,0 @@
-import * as lancedb from "@lancedb/lancedb";
-
-const db = await lancedb.connect("data/sample-lancedb");
-
-const data = Array.from({ length: 10_000 }, (_, i) => ({
-  vector: Array(1536).fill(i),
-  id: i,
-  item: `item ${i}`,
-  strId: `${i}`,
-}));
-
-const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
-
-// --8<-- [start:search]
-const _result = await tbl
-  .search(Array(1536).fill(0.5))
-  .limit(1)
-  .where("id = 10")
-  .toArray();
-// --8<-- [end:search]
-
-// --8<-- [start:vec_search]
-await tbl
-  .search(Array(1536).fill(0))
-  .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
-  .postfilter()
-  .toArray();
-// --8<-- [end:vec_search]
-
-// --8<-- [start:sql_search]
-await tbl.query().where("id = 10").limit(10).toArray();
-// --8<-- [end:sql_search]
-
-console.log("SQL search: done");
--- a/nodejs/examples/full_text_search.test.ts
+++ b/nodejs/examples/full_text_search.test.ts
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+import * as lancedb from "@lancedb/lancedb";
+import { withTempDirectory } from "./util.ts";
+
+test("full text search", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    const db = await lancedb.connect(databaseDir);
+
+    const words = [
+      "apple",
+      "banana",
+      "cherry",
+      "date",
+      "elderberry",
+      "fig",
+      "grape",
+    ];
+
+    const data = Array.from({ length: 10_000 }, (_, i) => ({
+      vector: Array(1536).fill(i),
+      id: i,
+      item: `item ${i}`,
+      strId: `${i}`,
+      doc: words[i % words.length],
+    }));
+
+    const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
+
+    await tbl.createIndex("doc", {
+      config: lancedb.Index.fts(),
+    });
+
+    // --8<-- [start:full_text_search]
+    const result = await tbl
+      .query()
+      .nearestToText("apple")
+      .select(["id", "doc"])
+      .limit(10)
+      .toArray();
+    expect(result.length).toBe(10);
+    // --8<-- [end:full_text_search]
+  });
+});
--- a/nodejs/examples/full_text_search.ts
+++ b/nodejs/examples/full_text_search.ts
@@ -1,52 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import * as lancedb from "@lancedb/lancedb";
-
-const db = await lancedb.connect("data/sample-lancedb");
-
-const words = [
-  "apple",
-  "banana",
-  "cherry",
-  "date",
-  "elderberry",
-  "fig",
-  "grape",
-];
-
-const data = Array.from({ length: 10_000 }, (_, i) => ({
-  vector: Array(1536).fill(i),
-  id: i,
-  item: `item ${i}`,
-  strId: `${i}`,
-  doc: words[i % words.length],
-}));
-
-const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
-
-await tbl.createIndex("doc", {
-  config: lancedb.Index.fts(),
-});
-
-// --8<-- [start:full_text_search]
-let result = await tbl
-  .search("apple")
-  .select(["id", "doc"])
-  .limit(10)
-  .toArray();
-console.log(result);
-// --8<-- [end:full_text_search]
-
-console.log("SQL search: done");
--- a/nodejs/examples/jest.config.cjs
+++ b/nodejs/examples/jest.config.cjs
@@ -0,0 +1,6 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: "ts-jest",
+  testEnvironment: "node",
+  testPathIgnorePatterns: ["./dist"],
+};
--- a/nodejs/examples/jsconfig.json
+++ b/nodejs/examples/jsconfig.json
@@ -1,27 +0,0 @@
-{
-  "compilerOptions": {
-    // Enable latest features
-    "lib": ["ESNext", "DOM"],
-    "target": "ESNext",
-    "module": "ESNext",
-    "moduleDetection": "force",
-    "jsx": "react-jsx",
-    "allowJs": true,
-
-    // Bundler mode
-    "moduleResolution": "bundler",
-    "allowImportingTsExtensions": true,
-    "verbatimModuleSyntax": true,
-    "noEmit": true,
-
-    // Best practices
-    "strict": true,
-    "skipLibCheck": true,
-    "noFallthroughCasesInSwitch": true,
-
-    // Some stricter flags (disabled by default)
-    "noUnusedLocals": false,
-    "noUnusedParameters": false,
-    "noPropertyAccessFromIndexSignature": false
-  }
-}
--- a/nodejs/examples/package-lock.json
+++ b/nodejs/examples/package-lock.json
--- a/nodejs/examples/package.json
+++ b/nodejs/examples/package.json
@@ -5,24 +5,29 @@
  "main": "index.js",
  "type": "module",
  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
+    "//1": "--experimental-vm-modules is needed to run jest with sentence-transformers",
+    "//2": "--testEnvironment is needed to run jest with sentence-transformers",
+    "//3": "See: https://github.com/huggingface/transformers.js/issues/57",
+    "test": "node --experimental-vm-modules node_modules/.bin/jest --testEnvironment jest-environment-node-single-context --verbose",
+    "lint": "biome check *.ts && biome format *.ts",
+    "lint-ci": "biome ci .",
+    "lint-fix": "biome check --write *.ts && npm run format",
+    "format": "biome format --write *.ts"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
-    "@lancedb/lancedb": "file:../",
-    "@xenova/transformers": "^2.17.2"
+    "@huggingface/transformers": "^3.0.2",
+    "@lancedb/lancedb": "file:../dist",
+    "openai": "^4.29.2",
+    "sharp": "^0.33.5"
  },
  "devDependencies": {
+    "@biomejs/biome": "^1.7.3",
+    "@jest/globals": "^29.7.0",
+    "jest": "^29.7.0",
+    "jest-environment-node-single-context": "^29.4.0",
+    "ts-jest": "^29.2.5",
    "typescript": "^5.5.4"
-  },
-  "compilerOptions": {
-    "target": "ESNext",
-    "module": "ESNext",
-    "moduleResolution": "Node",
-    "strict": true,
-    "esModuleInterop": true,
-    "skipLibCheck": true,
-    "forceConsistentCasingInFileNames": true
  }
 }
--- a/nodejs/examples/search.test.ts
+++ b/nodejs/examples/search.test.ts
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+// --8<-- [start:import]
+import * as lancedb from "@lancedb/lancedb";
+// --8<-- [end:import]
+import { withTempDirectory } from "./util.ts";
+
+test("full text search", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    {
+      const db = await lancedb.connect(databaseDir);
+
+      const data = Array.from({ length: 10_000 }, (_, i) => ({
+        vector: Array(128).fill(i),
+        id: `${i}`,
+        content: "",
+        longId: `${i}`,
+      }));
+
+      await db.createTable("my_vectors", data);
+    }
+
+    // --8<-- [start:search1]
+    const db = await lancedb.connect(databaseDir);
+    const tbl = await db.openTable("my_vectors");
+
+    const results1 = await tbl.search(Array(128).fill(1.2)).limit(10).toArray();
+    // --8<-- [end:search1]
+    expect(results1.length).toBe(10);
+
+    // --8<-- [start:search2]
+    const results2 = await (
+      tbl.search(Array(128).fill(1.2)) as lancedb.VectorQuery
+    )
+      .distanceType("cosine")
+      .limit(10)
+      .toArray();
+    // --8<-- [end:search2]
+    expect(results2.length).toBe(10);
+  });
+});
--- a/nodejs/examples/search.ts
+++ b/nodejs/examples/search.ts
@@ -1,38 +0,0 @@
-// --8<-- [end:import]
-import * as fs from "node:fs";
-// --8<-- [start:import]
-import * as lancedb from "@lancedb/lancedb";
-
-async function setup() {
-  fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
-  const db = await lancedb.connect("data/sample-lancedb");
-
-  const data = Array.from({ length: 10_000 }, (_, i) => ({
-    vector: Array(1536).fill(i),
-    id: `${i}`,
-    content: "",
-    longId: `${i}`,
-  }));
-
-  await db.createTable("my_vectors", data);
-}
-
-await setup();
-
-// --8<-- [start:search1]
-const db = await lancedb.connect("data/sample-lancedb");
-const tbl = await db.openTable("my_vectors");
-
-const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
-// --8<-- [end:search1]
-
-// --8<-- [start:search2]
-const _results2 = await tbl
-  .search(Array(1536).fill(1.2))
-  .distanceType("cosine")
-  .limit(10)
-  .toArray();
-console.log(_results2);
-// --8<-- [end:search2]
-
-console.log("search: done");
--- a/nodejs/examples/sentence-transformers.js
+++ b/nodejs/examples/sentence-transformers.js
@@ -1,50 +0,0 @@
-import * as lancedb from "@lancedb/lancedb";
-
-import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
-import { Utf8 } from "apache-arrow";
-
-const db = await lancedb.connect("/tmp/db");
-const func = await getRegistry().get("huggingface").create();
-
-const facts = [
-  "Albert Einstein was a theoretical physicist.",
-  "The capital of France is Paris.",
-  "The Great Wall of China is one of the Seven Wonders of the World.",
-  "Python is a popular programming language.",
-  "Mount Everest is the highest mountain in the world.",
-  "Leonardo da Vinci painted the Mona Lisa.",
-  "Shakespeare wrote Hamlet.",
-  "The human body has 206 bones.",
-  "The speed of light is approximately 299,792 kilometers per second.",
-  "Water boils at 100 degrees Celsius.",
-  "The Earth orbits the Sun.",
-  "The Pyramids of Giza are located in Egypt.",
-  "Coffee is one of the most popular beverages in the world.",
-  "Tokyo is the capital city of Japan.",
-  "Photosynthesis is the process by which plants make their food.",
-  "The Pacific Ocean is the largest ocean on Earth.",
-  "Mozart was a prolific composer of classical music.",
-  "The Internet is a global network of computers.",
-  "Basketball is a sport played with a ball and a hoop.",
-  "The first computer virus was created in 1983.",
-  "Artificial neural networks are inspired by the human brain.",
-  "Deep learning is a subset of machine learning.",
-  "IBM's Watson won Jeopardy! in 2011.",
-  "The first computer programmer was Ada Lovelace.",
-  "The first chatbot was ELIZA, created in the 1960s.",
-].map((text) => ({ text }));
-
-const factsSchema = LanceSchema({
-  text: func.sourceField(new Utf8()),
-  vector: func.vectorField(),
-});
-
-const tbl = await db.createTable("facts", facts, {
-  mode: "overwrite",
-  schema: factsSchema,
-});
-
-const query = "How many bones are in the human body?";
-const actual = await tbl.search(query).limit(1).toArray();
-
-console.log("Answer: ", actual[0]["text"]);
--- a/nodejs/examples/sentence-transformers.test.ts
+++ b/nodejs/examples/sentence-transformers.test.ts
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { expect, test } from "@jest/globals";
+import { withTempDirectory } from "./util.ts";
+
+import * as lancedb from "@lancedb/lancedb";
+import "@lancedb/lancedb/embedding/transformers";
+import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
+import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
+import { Utf8 } from "apache-arrow";
+
+test("full text search", async () => {
+  await withTempDirectory(async (databaseDir) => {
+    const db = await lancedb.connect(databaseDir);
+    console.log(getRegistry());
+    const func = (await getRegistry()
+      .get("huggingface")
+      ?.create()) as EmbeddingFunction;
+
+    const facts = [
+      "Albert Einstein was a theoretical physicist.",
+      "The capital of France is Paris.",
+      "The Great Wall of China is one of the Seven Wonders of the World.",
+      "Python is a popular programming language.",
+      "Mount Everest is the highest mountain in the world.",
+      "Leonardo da Vinci painted the Mona Lisa.",
+      "Shakespeare wrote Hamlet.",
+      "The human body has 206 bones.",
+      "The speed of light is approximately 299,792 kilometers per second.",
+      "Water boils at 100 degrees Celsius.",
+      "The Earth orbits the Sun.",
+      "The Pyramids of Giza are located in Egypt.",
+      "Coffee is one of the most popular beverages in the world.",
+      "Tokyo is the capital city of Japan.",
+      "Photosynthesis is the process by which plants make their food.",
+      "The Pacific Ocean is the largest ocean on Earth.",
+      "Mozart was a prolific composer of classical music.",
+      "The Internet is a global network of computers.",
+      "Basketball is a sport played with a ball and a hoop.",
+      "The first computer virus was created in 1983.",
+      "Artificial neural networks are inspired by the human brain.",
+      "Deep learning is a subset of machine learning.",
+      "IBM's Watson won Jeopardy! in 2011.",
+      "The first computer programmer was Ada Lovelace.",
+      "The first chatbot was ELIZA, created in the 1960s.",
+    ].map((text) => ({ text }));
+
+    const factsSchema = LanceSchema({
+      text: func.sourceField(new Utf8()),
+      vector: func.vectorField(),
+    });
+
+    const tbl = await db.createTable("facts", facts, {
+      mode: "overwrite",
+      schema: factsSchema,
+    });
+
+    const query = "How many bones are in the human body?";
+    const actual = await tbl.search(query).limit(1).toArray();
+
+    expect(actual[0]["text"]).toBe("The human body has 206 bones.");
+  });
+}, 100_000);
--- a/nodejs/examples/tsconfig.json
+++ b/nodejs/examples/tsconfig.json
@@ -0,0 +1,17 @@
+{
+  "include": ["*.test.ts"],
+  "compilerOptions": {
+    "target": "es2022",
+    "module": "NodeNext",
+    "declaration": true,
+    "outDir": "./dist",
+    "strict": true,
+    "allowJs": true,
+    "resolveJsonModule": true,
+    "emitDecoratorMetadata": true,
+    "experimentalDecorators": true,
+    "moduleResolution": "NodeNext",
+    "allowImportingTsExtensions": true,
+    "emitDeclarationOnly": true
+  }
+}
--- a/nodejs/examples/util.ts
+++ b/nodejs/examples/util.ts
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import * as fs from "fs";
+import { tmpdir } from "os";
+import * as path from "path";
+
+export async function withTempDirectory(
+  fn: (tempDir: string) => Promise<void>,
+) {
+  const tmpDirPath = fs.mkdtempSync(path.join(tmpdir(), "temp-dir-"));
+  try {
+    await fn(tmpDirPath);
+  } finally {
+    fs.rmSync(tmpDirPath, { recursive: true });
+  }
+}
--- a/nodejs/jest.config.js
+++ b/nodejs/jest.config.js
@@ -4,4 +4,5 @@ module.exports = {
  testEnvironment: "node",
  moduleDirectories: ["node_modules", "./dist"],
  moduleFileExtensions: ["js", "ts"],
+  modulePathIgnorePatterns: ["<rootDir>/examples/"],
 };
--- a/nodejs/lancedb/embedding/index.ts
+++ b/nodejs/lancedb/embedding/index.ts
@@ -19,9 +19,6 @@ import { EmbeddingFunctionConfig, getRegistry } from "./registry";

 export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";

-// We need to explicitly export '*' so that the `register` decorator actually registers the class.
-export * from "./openai";
-export * from "./transformers";
 export * from "./registry";

 /**
--- a/nodejs/lancedb/embedding/registry.ts
+++ b/nodejs/lancedb/embedding/registry.ts
@@ -17,8 +17,6 @@ import {
  type EmbeddingFunctionConstructor,
 } from "./embedding_function";
 import "reflect-metadata";
-import { OpenAIEmbeddingFunction } from "./openai";
-import { TransformersEmbeddingFunction } from "./transformers";

 type CreateReturnType<T> = T extends { init: () => Promise<void> }
  ? Promise<T>
@@ -73,10 +71,6 @@ export class EmbeddingFunctionRegistry {
    };
  }

-  get(name: "openai"): EmbeddingFunctionCreate<OpenAIEmbeddingFunction>;
-  get(
-    name: "huggingface",
-  ): EmbeddingFunctionCreate<TransformersEmbeddingFunction>;
  get<T extends EmbeddingFunction<unknown>>(
    name: string,
  ): EmbeddingFunctionCreate<T> | undefined;
--- a/nodejs/lancedb/embedding/transformers.ts
+++ b/nodejs/lancedb/embedding/transformers.ts
@@ -47,8 +47,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
  string,
  Partial<XenovaTransformerOptions>
 > {
-  #model?: import("@xenova/transformers").PreTrainedModel;
-  #tokenizer?: import("@xenova/transformers").PreTrainedTokenizer;
+  #model?: import("@huggingface/transformers").PreTrainedModel;
+  #tokenizer?: import("@huggingface/transformers").PreTrainedTokenizer;
  #modelName: XenovaTransformerOptions["model"];
  #initialized = false;
  #tokenizerOptions: XenovaTransformerOptions["tokenizerOptions"];
@@ -92,18 +92,19 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
    try {
      // SAFETY:
      // since typescript transpiles `import` to `require`, we need to do this in an unsafe way
-      // We can't use `require` because `@xenova/transformers` is an ESM module
+      // We can't use `require` because `@huggingface/transformers` is an ESM module
      // and we can't use `import` directly because typescript will transpile it to `require`.
      // and we want to remain compatible with both ESM and CJS modules
      // so we use `eval` to bypass typescript for this specific import.
-      transformers = await eval('import("@xenova/transformers")');
+      transformers = await eval('import("@huggingface/transformers")');
    } catch (e) {
-      throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
+      throw new Error(`error loading @huggingface/transformers\nReason: ${e}`);
    }

    try {
      this.#model = await transformers.AutoModel.from_pretrained(
        this.#modelName,
+        { dtype: "fp32" },
      );
    } catch (e) {
      throw new Error(
@@ -128,7 +129,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
    } else {
      const config = this.#model!.config;

-      const ndims = config["hidden_size"];
+      // biome-ignore lint/style/useNamingConvention: we don't control this name.
+      const ndims = (config as unknown as { hidden_size: number }).hidden_size;
      if (!ndims) {
        throw new Error(
          "hidden_size not found in model config, you may need to manually specify the embedding dimensions. ",
@@ -183,7 +185,7 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
 }

 const tensorDiv = (
-  src: import("@xenova/transformers").Tensor,
+  src: import("@huggingface/transformers").Tensor,
  divBy: number,
 ) => {
  for (let i = 0; i < src.data.length; ++i) {
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -385,6 +385,20 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    return this;
  }

+  /**
+   * Set the number of candidates to consider during the search
+   *
+   * This argument is only used when the vector column has an HNSW index.
+   * If there is no index then this value is ignored.
+   *
+   * Increasing this value will increase the recall of your query but will
+   * also increase the latency of your query. The default value is 1.5*limit.
+   */
+  ef(ef: number): VectorQuery {
+    super.doCall((inner) => inner.ef(ef));
+    return this;
+  }
+
  /**
   * Set the vector column to query
   *
@@ -492,6 +506,42 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    super.doCall((inner) => inner.bypassVectorIndex());
    return this;
  }
+
+  /*
+   * Add a query vector to the search
+   *
+   * This method can be called multiple times to add multiple query vectors
+   * to the search. If multiple query vectors are added, then they will be searched
+   * in parallel, and the results will be concatenated. A column called `query_index`
+   * will be added to indicate the index of the query vector that produced the result.
+   *
+   * Performance wise, this is equivalent to running multiple queries concurrently.
+   */
+  addQueryVector(vector: IntoVector): VectorQuery {
+    if (vector instanceof Promise) {
+      const res = (async () => {
+        try {
+          const v = await vector;
+          const arr = Float32Array.from(v);
+          //
+          // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
+          const value: any = this.addQueryVector(arr);
+          const inner = value.inner as
+            | NativeVectorQuery
+            | Promise<NativeVectorQuery>;
+          return inner;
+        } catch (e) {
+          return Promise.reject(e);
+        }
+      })();
+      return new VectorQuery(res);
+    } else {
+      super.doCall((inner) => {
+        inner.addQueryVector(Float32Array.from(vector));
+      });
+      return this;
+    }
+  }
 }

 /** A builder for LanceDB queries. */
@@ -571,4 +621,9 @@ export class Query extends QueryBase<NativeQuery> {
      return new VectorQuery(vectorQuery);
    }
  }
+
+  nearestToText(query: string, columns?: string[]): Query {
+    this.doCall((inner) => inner.fullTextSearch(query, columns));
+    return this;
+  }
 }
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -87,6 +87,12 @@ export interface OptimizeOptions {
  deleteUnverified: boolean;
 }

+export interface Version {
+  version: number;
+  timestamp: Date;
+  metadata: Record<string, string>;
+}
+
 /**
 * A Table is a collection of Records in a LanceDB Database.
 *
@@ -360,6 +366,11 @@ export abstract class Table {
   */
  abstract checkoutLatest(): Promise<void>;

+  /**
+   * List all the versions of the table
+   */
+  abstract listVersions(): Promise<Version[]>;
+
  /**
   * Restore the table to the currently checked out version
   *
@@ -659,6 +670,14 @@ export class LocalTable extends Table {
    await this.inner.checkoutLatest();
  }

+  async listVersions(): Promise<Version[]> {
+    return (await this.inner.listVersions()).map((version) => ({
+      version: version.version,
+      timestamp: new Date(version.timestamp / 1000),
+      metadata: version.metadata,
+    }));
+  }
+
  async restore(): Promise<void> {
    await this.inner.restore();
  }
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/README.md
+++ b/nodejs/npm/linux-arm64-musl/README.md
@@ -0,0 +1,3 @@
+# `@lancedb/lancedb-linux-arm64-musl`
+
+This is the **aarch64-unknown-linux-musl** binary for `@lancedb/lancedb`
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -0,0 +1,13 @@
+{
+	"name": "@lancedb/lancedb-linux-arm64-musl",
+	"version": "0.13.0",
+	"os": ["linux"],
+	"cpu": ["arm64"],
+	"main": "lancedb.linux-arm64-musl.node",
+	"files": ["lancedb.linux-arm64-musl.node"],
+	"license": "Apache 2.0",
+	"engines": {
+		"node": ">= 18"
+	},
+	"libc": ["musl"]
+}
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/README.md
+++ b/nodejs/npm/linux-x64-musl/README.md
@@ -0,0 +1,3 @@
+# `@lancedb/lancedb-linux-x64-musl`
+
+This is the **x86_64-unknown-linux-musl** binary for `@lancedb/lancedb`
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -0,0 +1,13 @@
+{
+	"name": "@lancedb/lancedb-linux-x64-musl",
+	"version": "0.13.0",
+	"os": ["linux"],
+	"cpu": ["x64"],
+	"main": "lancedb.linux-x64-musl.node",
+	"files": ["lancedb.linux-x64-musl.node"],
+	"license": "Apache 2.0",
+	"engines": {
+		"node": ">= 18"
+	},
+	"libc": ["musl"]
+}
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -10,11 +10,13 @@
    "vector database",
    "ann"
  ],
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
-    "./embedding": "./dist/embedding/index.js"
+    "./embedding": "./dist/embedding/index.js",
+    "./embedding/openai": "./dist/embedding/openai.js",
+    "./embedding/transformers": "./dist/embedding/transformers.js"
  },
  "types": "dist/index.d.ts",
  "napi": {
@@ -22,10 +24,12 @@
    "triples": {
      "defaults": false,
      "additional": [
-        "aarch64-apple-darwin",
-        "aarch64-unknown-linux-gnu",
        "x86_64-apple-darwin",
+        "aarch64-apple-darwin",
        "x86_64-unknown-linux-gnu",
+        "aarch64-unknown-linux-gnu",
+        "x86_64-unknown-linux-musl",
+        "aarch64-unknown-linux-musl",
        "x86_64-pc-windows-msvc"
      ]
    }
@@ -85,7 +89,7 @@
    "reflect-metadata": "^0.2.2"
  },
  "optionalDependencies": {
-    "@xenova/transformers": ">=2.17 < 3",
+    "@huggingface/transformers": "^3.0.2",
    "openai": "^4.29.2"
  },
  "peerDependencies": {
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -135,6 +135,16 @@ impl VectorQuery {
        self.inner = self.inner.clone().column(&column);
    }

+    #[napi]
+    pub fn add_query_vector(&mut self, vector: Float32Array) -> Result<()> {
+        self.inner = self
+            .inner
+            .clone()
+            .add_query_vector(vector.as_ref())
+            .default_error()?;
+        Ok(())
+    }
+
    #[napi]
    pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
        let distance_type = parse_distance_type(distance_type)?;
@@ -157,6 +167,11 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    #[napi]
+    pub fn ef(&mut self, ef: u32) {
+        self.inner = self.inner.clone().ef(ef as usize);
+    }
+
    #[napi]
    pub fn bypass_vector_index(&mut self) {
        self.inner = self.inner.clone().bypass_vector_index()
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
+
 use arrow_ipc::writer::FileWriter;
 use lancedb::ipc::ipc_file_to_batches;
 use lancedb::table::{
@@ -226,6 +228,28 @@ impl Table {
        self.inner_ref()?.checkout_latest().await.default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn list_versions(&self) -> napi::Result<Vec<Version>> {
+        self.inner_ref()?
+            .list_versions()
+            .await
+            .map(|versions| {
+                versions
+                    .iter()
+                    .map(|version| Version {
+                        version: version.version as i64,
+                        timestamp: version.timestamp.timestamp_micros(),
+                        metadata: version
+                            .metadata
+                            .iter()
+                            .map(|(k, v)| (k.clone(), v.clone()))
+                            .collect(),
+                    })
+                    .collect()
+            })
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn restore(&self) -> napi::Result<()> {
        self.inner_ref()?.restore().await.default_error()
@@ -466,3 +490,10 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
        }
    }
 }
+
+#[napi(object)]
+pub struct Version {
+    pub version: i64,
+    pub timestamp: i64,
+    pub metadata: HashMap<String, String>,
+}
--- a/nodejs/tsconfig.json
+++ b/nodejs/tsconfig.json
@@ -12,7 +12,7 @@
    "experimentalDecorators": true,
    "moduleResolution": "Node"
  },
-  "exclude": ["./dist/*"],
+  "exclude": ["./dist/*", "./examples/*"],
  "typedocOptions": {
    "entryPoints": ["lancedb/index.ts"],
    "out": "../docs/src/javascript/",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.16.0-beta.0"
+current_version = "0.16.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.16.0-beta.0"
+version = "0.16.1-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -15,7 +15,7 @@ crate-type = ["cdylib"]

 [dependencies]
 arrow = { version = "52.1", features = ["pyarrow"] }
-lancedb = { path = "../rust/lancedb" }
+lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
 pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
 # Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
@@ -33,6 +33,11 @@ pyo3-build-config = { version = "0.20.3", features = [
 ] }

 [features]
-default = ["remote"]
+default = ["default-tls", "remote"]
 fp16kernels = ["lancedb/fp16kernels"]
 remote = ["lancedb/remote"]
+
+# TLS
+default-tls = ["lancedb/default-tls"]
+native-tls = ["lancedb/native-tls"]
+rustls-tls = ["lancedb/rustls-tls"]
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ name = "lancedb"
 dependencies = [
    "deprecation",
    "nest-asyncio~=1.0",
-    "pylance==0.19.2-beta.3",
+    "pylance==0.20.0b2",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "packaging",
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -1,15 +1,6 @@
-#  Copyright (c) 2023. LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 import json
 from typing import Dict, Optional

@@ -170,7 +161,7 @@ def register(name):
    return __REGISTRY__.get_instance().register(name)


-def get_registry():
+def get_registry() -> EmbeddingFunctionRegistry:
    """
    Utility function to get the global instance of the registry

--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -131,6 +131,8 @@ class Query(pydantic.BaseModel):

    fast_search: bool = False

+    ef: Optional[int] = None
+

 class LanceQueryBuilder(ABC):
    """An abstract query builder. Subclasses are defined for vector search,
@@ -257,6 +259,7 @@ class LanceQueryBuilder(ABC):
        self._with_row_id = False
        self._vector = None
        self._text = None
+        self._ef = None

    @deprecation.deprecated(
        deprecated_in="0.3.1",
@@ -367,11 +370,13 @@ class LanceQueryBuilder(ABC):
        ----------
        limit: int
            The maximum number of results to return.
-            By default the query is limited to the first 10.
-            Call this method and pass 0, a negative value,
-            or None to remove the limit.
-            *WARNING* if you have a large dataset, removing
-            the limit can potentially result in reading a
+            The default query limit is 10 results.
+            For ANN/KNN queries, you must specify a limit.
+            Entering 0, a negative number, or None will reset
+            the limit to the default value of 10.
+            *WARNING* if you have a large dataset, setting
+            the limit to a large number, e.g. the table size,
+            can potentially result in reading a
            large amount of data into memory and cause
            out of memory issues.

@@ -638,6 +643,28 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        self._nprobes = nprobes
        return self

+    def ef(self, ef: int) -> LanceVectorQueryBuilder:
+        """Set the number of candidates to consider during search.
+
+        Higher values will yield better recall (more likely to find vectors if
+        they exist) at the expense of latency.
+
+        This only applies to the HNSW-related index.
+        The default value is 1.5 * limit.
+
+        Parameters
+        ----------
+        ef: int
+            The number of candidates to consider during search.
+
+        Returns
+        -------
+        LanceVectorQueryBuilder
+            The LanceQueryBuilder object.
+        """
+        self._ef = ef
+        return self
+
    def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
        """Set the refine factor to use, increasing the number of vectors sampled.

@@ -700,6 +727,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
            with_row_id=self._with_row_id,
            offset=self._offset,
            fast_search=self._fast_search,
+            ef=self._ef,
        )
        result_set = self._table._execute_query(query, batch_size)
        if self._reranker is not None:
@@ -943,12 +971,16 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):

 class LanceEmptyQueryBuilder(LanceQueryBuilder):
    def to_arrow(self) -> pa.Table:
-        ds = self._table.to_lance()
-        return ds.to_table(
+        query = Query(
            columns=self._columns,
            filter=self._where,
-            limit=self._limit,
+            k=self._limit or 10,
+            with_row_id=self._with_row_id,
+            vector=[],
+            # not actually respected in remote query
+            offset=self._offset or 0,
        )
+        return self._table._execute_query(query).read_all()

    def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
        """Rerank the results using the specified reranker.
@@ -1067,6 +1099,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            self._vector_query.nprobes(self._nprobes)
        if self._refine_factor:
            self._vector_query.refine_factor(self._refine_factor)
+        if self._ef:
+            self._vector_query.ef(self._ef)

        with ThreadPoolExecutor() as executor:
            fts_future = executor.submit(self._fts_query.with_row_id(True).to_arrow)
@@ -1193,6 +1227,29 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._nprobes = nprobes
        return self

+    def ef(self, ef: int) -> LanceHybridQueryBuilder:
+        """
+        Set the number of candidates to consider during search.
+
+        Higher values will yield better recall (more likely to find vectors if
+        they exist) at the expense of latency.
+
+        This only applies to the HNSW-related index.
+        The default value is 1.5 * limit.
+
+        Parameters
+        ----------
+        ef: int
+            The number of candidates to consider during search.
+
+        Returns
+        -------
+        LanceHybridQueryBuilder
+            The LanceHybridQueryBuilder object.
+        """
+        self._ef = ef
+        return self
+
    def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
        """Set the distance metric to use.

@@ -1491,7 +1548,8 @@ class AsyncQuery(AsyncQueryBase):
        return pa.array(vec)

    def nearest_to(
-        self, query_vector: Optional[Union[VEC, Tuple]] = None
+        self,
+        query_vector: Union[VEC, Tuple, List[VEC]],
    ) -> AsyncVectorQuery:
        """
        Find the nearest vectors to the given query vector.
@@ -1529,10 +1587,33 @@ class AsyncQuery(AsyncQueryBase):

        Vector searches always have a [limit][].  If `limit` has not been called then
        a default `limit` of 10 will be used.
+
+        Typically, a single vector is passed in as the query. However, you can also
+        pass in multiple vectors.  This can be useful if you want to find the nearest
+        vectors to multiple query vectors. This is not expected to be faster than
+        making multiple queries concurrently; it is just a convenience method.
+        If multiple vectors are passed in then an additional column `query_index`
+        will be added to the results.  This column will contain the index of the
+        query vector that the result is nearest to.
        """
-        return AsyncVectorQuery(
-            self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
-        )
+        if query_vector is None:
+            raise ValueError("query_vector can not be None")
+
+        if (
+            isinstance(query_vector, list)
+            and len(query_vector) > 0
+            and not isinstance(query_vector[0], (float, int))
+        ):
+            # multiple have been passed
+            query_vectors = [AsyncQuery._query_vec_to_array(v) for v in query_vector]
+            new_self = self._inner.nearest_to(query_vectors[0])
+            for v in query_vectors[1:]:
+                new_self.add_query_vector(v)
+            return AsyncVectorQuery(new_self)
+        else:
+            return AsyncVectorQuery(
+                self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
+            )

    def nearest_to_text(
        self, query: str, columns: Union[str, List[str]] = []
@@ -1594,7 +1675,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        """
        Set the number of partitions to search (probe)

-        This argument is only used when the vector column has an IVF PQ index.
+        This argument is only used when the vector column has an IVF-based index.
        If there is no index then this value is ignored.

        The IVF stage of IVF PQ divides the input into partitions (clusters) of
@@ -1616,6 +1697,21 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.nprobes(nprobes)
        return self

+    def ef(self, ef: int) -> AsyncVectorQuery:
+        """
+        Set the number of candidates to consider during search
+
+        This argument is only used when the vector column has an HNSW index.
+        If there is no index then this value is ignored.
+
+        Increasing this value will increase the recall of your query but will also
+        increase the latency of your query.  The default value is 1.5 * limit.  This
+        default is good for many cases but the best value to use will depend on your
+        data and the recall that you need to achieve.
+        """
+        self._inner.ef(ef)
+        return self
+
    def refine_factor(self, refine_factor: int) -> AsyncVectorQuery:
        """
        A multiplier to control how many additional rows are taken during the refine
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -78,6 +78,10 @@ class RemoteTable(Table):
            self.schema.metadata
        )

+    def list_versions(self):
+        """List all versions of the table"""
+        return self._loop.run_until_complete(self._table.list_versions())
+
    def to_arrow(self) -> pa.Table:
        """to_arrow() is not yet supported on LanceDB cloud."""
        raise NotImplementedError("to_arrow() is not yet supported on LanceDB cloud.")
@@ -86,6 +90,12 @@ class RemoteTable(Table):
        """to_pandas() is not yet supported on LanceDB cloud."""
        return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

+    def checkout(self, version):
+        return self._loop.run_until_complete(self._table.checkout(version))
+
+    def checkout_latest(self):
+        return self._loop.run_until_complete(self._table.checkout_latest())
+
    def list_indices(self):
        """List all the indices on the table"""
        return self._loop.run_until_complete(self._table.list_indices())
@@ -327,10 +337,6 @@ class RemoteTable(Table):
            - and also the "_distance" column which is the distance between the query
            vector and the returned vector.
        """
-        # empty query builder is not supported in saas, raise error
-        if query is None and query_type != "hybrid":
-            raise ValueError("Empty query is not supported")
-
        return LanceQueryBuilder.create(
            self,
            query,
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -41,7 +41,7 @@ class CohereReranker(Reranker):

    def __init__(
        self,
-        model_name: str = "rerank-english-v2.0",
+        model_name: str = "rerank-english-v3.0",
        column: str = "text",
        top_n: Union[int, None] = None,
        return_score="relevance",
--- a/python/python/lancedb/rerankers/voyageai.py
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -13,7 +13,7 @@

 import os
 from functools import cached_property
-from typing import Union, Optional
+from typing import Optional

 import pyarrow as pa

--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -8,7 +8,7 @@ import inspect
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from datetime import timedelta
+from datetime import datetime, timedelta
 from functools import cached_property
 from typing import (
    TYPE_CHECKING,
@@ -73,6 +73,21 @@ pl = safe_import_polars()
 QueryType = Literal["vector", "fts", "hybrid", "auto"]


+def _pd_schema_without_embedding_funcs(
+    schema: Optional[pa.Schema], columns: List[str]
+) -> Optional[pa.Schema]:
+    """Return a schema without any embedding function columns"""
+    if schema is None:
+        return None
+    embedding_functions = EmbeddingFunctionRegistry.get_instance().parse_functions(
+        schema.metadata
+    )
+    if not embedding_functions:
+        return schema
+    columns = set(columns)
+    return pa.schema([field for field in schema if field.name in columns])
+
+
 def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
    if _check_for_hugging_face(data):
        # Huggingface datasets
@@ -103,10 +118,10 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
        elif isinstance(data[0], pa.RecordBatch):
            return pa.Table.from_batches(data, schema=schema)
        else:
-            return pa.Table.from_pylist(data)
+            return pa.Table.from_pylist(data, schema=schema)
    elif _check_for_pandas(data) and isinstance(data, pd.DataFrame):
-        # Do not add schema here, since schema may contains the vector column
-        table = pa.Table.from_pandas(data, preserve_index=False)
+        raw_schema = _pd_schema_without_embedding_funcs(schema, data.columns.to_list())
+        table = pa.Table.from_pandas(data, preserve_index=False, schema=raw_schema)
        # Do not serialize Pandas metadata
        meta = table.schema.metadata if table.schema.metadata is not None else {}
        meta = {k: v for k, v in meta.items() if k != b"pandas"}
@@ -172,6 +187,8 @@ def sanitize_create_table(
        schema = schema.to_arrow_schema()

    if data is not None:
+        if metadata is None and schema is not None:
+            metadata = schema.metadata
        data, schema = _sanitize_data(
            data,
            schema,
@@ -995,6 +1012,39 @@ class Table(ABC):
            The names of the columns to drop.
        """

+    @abstractmethod
+    def checkout(self):
+        """
+        Checks out a specific version of the Table
+
+        Any read operation on the table will now access the data at the checked out
+        version. As a consequence, calling this method will disable any read consistency
+        interval that was previously set.
+
+        This is a read-only operation that turns the table into a sort of "view"
+        or "detached head".  Other table instances will not be affected.  To make the
+        change permanent you can use the `[Self::restore]` method.
+
+        Any operation that modifies the table will fail while the table is in a checked
+        out state.
+
+        To return the table to a normal state use `[Self::checkout_latest]`
+        """
+
+    @abstractmethod
+    def checkout_latest(self):
+        """
+        Ensures the table is pointing at the latest version
+
+        This can be used to manually update a table when the read_consistency_interval
+        is None
+        It can also be used to undo a `[Self::checkout]` operation
+        """
+
+    @abstractmethod
+    def list_versions(self):
+        """List all versions of the table"""
+
    @cached_property
    def _dataset_uri(self) -> str:
        return _table_uri(self._conn.uri, self.name)
@@ -1550,7 +1600,7 @@ class LanceTable(Table):
            "append" and "overwrite".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
-            One of "error", "drop", "fill".
+            One of "error", "drop", "fill", "null".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".

@@ -1834,7 +1884,7 @@ class LanceTable(Table):
            data but will validate against any schema that's specified.
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
-            One of "error", "drop", "fill".
+            One of "error", "drop", "fill", "null".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".
        embedding_functions: list of EmbeddingFunctionModel, default None
@@ -1942,6 +1992,7 @@ class LanceTable(Table):
                "metric": query.metric,
                "nprobes": query.nprobes,
                "refine_factor": query.refine_factor,
+                "ef": query.ef,
            }
        return ds.scanner(
            columns=query.columns,
@@ -2134,13 +2185,11 @@ def _sanitize_schema(
        vector column to fixed_size_list(float32) if necessary.
    on_bad_vectors: str, default "error"
        What to do if any of the vectors are not the same size or contains NaNs.
-        One of "error", "drop", "fill".
+        One of "error", "drop", "fill", "null".
    fill_value: float, default 0.
        The value to use when filling vectors. Only used if on_bad_vectors="fill".
    """
    if schema is not None:
-        if data.schema == schema:
-            return data
        # cast the columns to the expected types
        data = data.combine_chunks()
        for field in schema:
@@ -2160,6 +2209,7 @@ def _sanitize_schema(
                    vector_column_name=field.name,
                    on_bad_vectors=on_bad_vectors,
                    fill_value=fill_value,
+                    table_schema=schema,
                )
        return pa.Table.from_arrays(
            [data[name] for name in schema.names], schema=schema
@@ -2180,6 +2230,7 @@ def _sanitize_schema(
 def _sanitize_vector_column(
    data: pa.Table,
    vector_column_name: str,
+    table_schema: Optional[pa.Schema] = None,
    on_bad_vectors: str = "error",
    fill_value: float = 0.0,
 ) -> pa.Table:
@@ -2194,12 +2245,16 @@ def _sanitize_vector_column(
        The name of the vector column.
    on_bad_vectors: str, default "error"
        What to do if any of the vectors are not the same size or contains NaNs.
-        One of "error", "drop", "fill".
+        One of "error", "drop", "fill", "null".
    fill_value: float, default 0.0
        The value to use when filling vectors. Only used if on_bad_vectors="fill".
    """
    # ChunkedArray is annoying to work with, so we combine chunks here
    vec_arr = data[vector_column_name].combine_chunks()
+    if table_schema is not None:
+        field = table_schema.field(vector_column_name)
+    else:
+        field = None
    typ = data[vector_column_name].type
    if pa.types.is_list(typ) or pa.types.is_large_list(typ):
        # if it's a variable size list array,
@@ -2226,7 +2281,11 @@ def _sanitize_vector_column(
                data, fill_value, on_bad_vectors, vec_arr, vector_column_name
            )
    else:
-        if pc.any(pc.is_null(vec_arr.values, nan_is_null=True)).as_py():
+        if (
+            field is not None
+            and not field.nullable
+            and pc.any(pc.is_null(vec_arr.values)).as_py()
+        ) or (pc.any(pc.is_nan(vec_arr.values)).as_py()):
            data = _sanitize_nans(
                data, fill_value, on_bad_vectors, vec_arr, vector_column_name
            )
@@ -2270,6 +2329,12 @@ def _sanitize_jagged(data, fill_value, on_bad_vectors, vec_arr, vector_column_na
        )
    elif on_bad_vectors == "drop":
        data = data.filter(correct_ndims)
+    elif on_bad_vectors == "null":
+        data = data.set_column(
+            data.column_names.index(vector_column_name),
+            vector_column_name,
+            pc.if_else(correct_ndims, vec_arr, pa.scalar(None)),
+        )
    return data


@@ -2286,7 +2351,8 @@ def _sanitize_nans(
        raise ValueError(
            f"Vector column {vector_column_name} has NaNs. "
            "Set on_bad_vectors='drop' to remove them, or "
-            "set on_bad_vectors='fill' and fill_value=<value> to replace them."
+            "set on_bad_vectors='fill' and fill_value=<value> to replace them. "
+            "Or set on_bad_vectors='null' to replace them with null."
        )
    elif on_bad_vectors == "fill":
        if fill_value is None:
@@ -2306,6 +2372,17 @@ def _sanitize_nans(
        np_arr = np_arr.reshape(-1, vec_arr.type.list_size)
        not_nulls = np.any(np_arr, axis=1)
        data = data.filter(~not_nulls)
+    elif on_bad_vectors == "null":
+        # null = pa.nulls(len(vec_arr)).cast(vec_arr.type)
+        # values = pc.if_else(pc.is_nan(vec_arr.values), fill_value, vec_arr.values)
+        np_arr = np.isnan(vec_arr.values.to_numpy(zero_copy_only=False))
+        np_arr = np_arr.reshape(-1, vec_arr.type.list_size)
+        no_nans = np.any(np_arr, axis=1)
+        data = data.set_column(
+            data.column_names.index(vector_column_name),
+            vector_column_name,
+            pc.if_else(no_nans, vec_arr, pa.scalar(None)),
+        )
    return data


@@ -2571,7 +2648,7 @@ class AsyncTable:
            "append" and "overwrite".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
-            One of "error", "drop", "fill".
+            One of "error", "drop", "fill", "null".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".

@@ -2654,7 +2731,7 @@ class AsyncTable:

    def vector_search(
        self,
-        query_vector: Optional[Union[VEC, Tuple]] = None,
+        query_vector: Union[VEC, Tuple],
    ) -> AsyncVectorQuery:
        """
        Search the table with a given query vector.
@@ -2693,6 +2770,8 @@ class AsyncTable:
                async_query = async_query.refine_factor(query.refine_factor)
            if query.vector_column:
                async_query = async_query.column(query.vector_column)
+            if query.ef:
+                async_query = async_query.ef(query.ef)

        if not query.prefilter:
            async_query = async_query.postfilter()
@@ -2856,6 +2935,19 @@ class AsyncTable:
        """
        return await self._inner.version()

+    async def list_versions(self):
+        """
+        List all versions of the table
+        """
+        versions = await self._inner.list_versions()
+        for v in versions:
+            ts_nanos = v["timestamp"]
+            v["timestamp"] = datetime.fromtimestamp(ts_nanos // 1e9) + timedelta(
+                microseconds=(ts_nanos % 1e9) // 1e3
+            )
+
+        return versions
+
    async def checkout(self, version):
        """
        Checks out a specific version of the Table
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -1,15 +1,6 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 from typing import List, Union
 from unittest.mock import MagicMock, patch

@@ -18,6 +9,7 @@ import lancedb
 import numpy as np
 import pyarrow as pa
 import pytest
+import pandas as pd
 from lancedb.conftest import MockTextEmbeddingFunction
 from lancedb.embeddings import (
    EmbeddingFunctionConfig,
@@ -89,14 +81,15 @@ def test_embedding_function(tmp_path):


 def test_embedding_with_bad_results(tmp_path):
-    @register("mock-embedding")
-    class MockEmbeddingFunction(TextEmbeddingFunction):
+    @register("null-embedding")
+    class NullEmbeddingFunction(TextEmbeddingFunction):
        def ndims(self):
            return 128

        def generate_embeddings(
            self, texts: Union[List[str], np.ndarray]
        ) -> list[Union[np.array, None]]:
+            # Return None, which is bad if field is non-nullable
            return [
                None if i % 2 == 0 else np.random.randn(self.ndims())
                for i in range(len(texts))
@@ -104,13 +97,17 @@ def test_embedding_with_bad_results(tmp_path):

    db = lancedb.connect(tmp_path)
    registry = EmbeddingFunctionRegistry.get_instance()
-    model = registry.get("mock-embedding").create()
+    model = registry.get("null-embedding").create()

    class Schema(LanceModel):
        text: str = model.SourceField()
        vector: Vector(model.ndims()) = model.VectorField()

    table = db.create_table("test", schema=Schema, mode="overwrite")
+    with pytest.raises(ValueError):
+        # Default on_bad_vectors is "error"
+        table.add([{"text": "hello world"}])
+
    table.add(
        [{"text": "hello world"}, {"text": "bar"}],
        on_bad_vectors="drop",
@@ -120,13 +117,169 @@ def test_embedding_with_bad_results(tmp_path):
    assert len(table) == 1
    assert df.iloc[0]["text"] == "bar"

-    # table = db.create_table("test2", schema=Schema, mode="overwrite")
-    # table.add(
-    #     [{"text": "hello world"}, {"text": "bar"}],
-    # )
-    # assert len(table) == 2
-    # tbl = table.to_arrow()
-    # assert tbl["vector"].null_count == 1
+    @register("nan-embedding")
+    class NanEmbeddingFunction(TextEmbeddingFunction):
+        def ndims(self):
+            return 128
+
+        def generate_embeddings(
+            self, texts: Union[List[str], np.ndarray]
+        ) -> list[Union[np.array, None]]:
+            # Return NaN to produce bad vectors
+            return [
+                [np.NAN] * 128 if i % 2 == 0 else np.random.randn(self.ndims())
+                for i in range(len(texts))
+            ]
+
+    db = lancedb.connect(tmp_path)
+    registry = EmbeddingFunctionRegistry.get_instance()
+    model = registry.get("nan-embedding").create()
+
+    table = db.create_table("test2", schema=Schema, mode="overwrite")
+    table.alter_columns(dict(path="vector", nullable=True))
+    table.add(
+        [{"text": "hello world"}, {"text": "bar"}],
+        on_bad_vectors="null",
+    )
+    assert len(table) == 2
+    tbl = table.to_arrow()
+    assert tbl["vector"].null_count == 1
+
+
+def test_with_existing_vectors(tmp_path):
+    @register("mock-embedding")
+    class MockEmbeddingFunction(TextEmbeddingFunction):
+        def ndims(self):
+            return 128
+
+        def generate_embeddings(
+            self, texts: Union[List[str], np.ndarray]
+        ) -> List[np.array]:
+            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
+
+    registry = get_registry()
+    model = registry.get("mock-embedding").create()
+
+    class Schema(LanceModel):
+        text: str = model.SourceField()
+        vector: Vector(model.ndims()) = model.VectorField()
+
+    db = lancedb.connect(tmp_path)
+    tbl = db.create_table("test", schema=Schema, mode="overwrite")
+    tbl.add([{"text": "hello world", "vector": np.zeros(128).tolist()}])
+
+    embeddings = tbl.to_arrow()["vector"].to_pylist()
+    assert not np.any(embeddings), "all zeros"
+
+
+def test_embedding_function_with_pandas(tmp_path):
+    @register("mock-embedding")
+    class _MockEmbeddingFunction(TextEmbeddingFunction):
+        def ndims(self):
+            return 128
+
+        def generate_embeddings(
+            self, texts: Union[List[str], np.ndarray]
+        ) -> List[np.array]:
+            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
+
+    registery = get_registry()
+    func = registery.get("mock-embedding").create()
+
+    class TestSchema(LanceModel):
+        text: str = func.SourceField()
+        val: int
+        vector: Vector(func.ndims()) = func.VectorField()
+
+    df = pd.DataFrame(
+        {
+            "text": ["hello world", "goodbye world"],
+            "val": [1, 2],
+            "not-used": ["s1", "s3"],
+        }
+    )
+    db = lancedb.connect(tmp_path)
+    tbl = db.create_table("test", schema=TestSchema, mode="overwrite", data=df)
+    schema = tbl.schema
+    assert schema.field("text").type == pa.string()
+    assert schema.field("val").type == pa.int64()
+    assert schema.field("vector").type == pa.list_(pa.float32(), 128)
+
+    df = pd.DataFrame(
+        {
+            "text": ["extra", "more"],
+            "val": [4, 5],
+            "misc-col": ["s1", "s3"],
+        }
+    )
+    tbl.add(df)
+
+    assert tbl.count_rows() == 4
+    embeddings = tbl.to_arrow()["vector"]
+    assert embeddings.null_count == 0
+
+    df = pd.DataFrame(
+        {
+            "text": ["with", "embeddings"],
+            "val": [6, 7],
+            "vector": [np.zeros(128).tolist(), np.zeros(128).tolist()],
+        }
+    )
+    tbl.add(df)
+
+    embeddings = tbl.search().where("val > 5").to_arrow()["vector"].to_pylist()
+    assert not np.any(embeddings), "all zeros"
+
+
+def test_multiple_embeddings_for_pandas(tmp_path):
+    @register("mock-embedding")
+    class MockFunc1(TextEmbeddingFunction):
+        def ndims(self):
+            return 128
+
+        def generate_embeddings(
+            self, texts: Union[List[str], np.ndarray]
+        ) -> List[np.array]:
+            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
+
+    @register("mock-embedding2")
+    class MockFunc2(TextEmbeddingFunction):
+        def ndims(self):
+            return 512
+
+        def generate_embeddings(
+            self, texts: Union[List[str], np.ndarray]
+        ) -> List[np.array]:
+            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
+
+    registery = get_registry()
+    func1 = registery.get("mock-embedding").create()
+    func2 = registery.get("mock-embedding2").create()
+
+    class TestSchema(LanceModel):
+        text: str = func1.SourceField()
+        val: int
+        vec1: Vector(func1.ndims()) = func1.VectorField()
+        prompt: str = func2.SourceField()
+        vec2: Vector(func2.ndims()) = func2.VectorField()
+
+    df = pd.DataFrame(
+        {
+            "text": ["hello world", "goodbye world"],
+            "val": [1, 2],
+            "prompt": ["hello", "goodbye"],
+        }
+    )
+    db = lancedb.connect(tmp_path)
+    tbl = db.create_table("test", schema=TestSchema, mode="overwrite", data=df)
+
+    schema = tbl.schema
+    assert schema.field("text").type == pa.string()
+    assert schema.field("val").type == pa.int64()
+    assert schema.field("vec1").type == pa.list_(pa.float32(), 128)
+    assert schema.field("prompt").type == pa.string()
+    assert schema.field("vec2").type == pa.list_(pa.float32(), 512)
+    assert tbl.count_rows() == 2


@pytest.mark.slow
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1,21 +1,9 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import unittest.mock as mock
 from datetime import timedelta
-from typing import Optional

-import lance
 import lancedb
 from lancedb.index import IvfPq
 import numpy as np
@@ -23,41 +11,15 @@ import pandas.testing as tm
 import pyarrow as pa
 import pytest
 import pytest_asyncio
-from lancedb.db import LanceDBConnection
 from lancedb.pydantic import LanceModel, Vector
 from lancedb.query import AsyncQueryBase, LanceVectorQueryBuilder, Query
 from lancedb.table import AsyncTable, LanceTable


-class MockTable:
-    def __init__(self, tmp_path):
-        self.uri = tmp_path
-        self._conn = LanceDBConnection(self.uri)
-
-    def to_lance(self):
-        return lance.dataset(self.uri)
-
-    def _execute_query(self, query, batch_size: Optional[int] = None):
-        ds = self.to_lance()
-        return ds.scanner(
-            columns=query.columns,
-            filter=query.filter,
-            prefilter=query.prefilter,
-            nearest={
-                "column": query.vector_column,
-                "q": query.vector,
-                "k": query.k,
-                "metric": query.metric,
-                "nprobes": query.nprobes,
-                "refine_factor": query.refine_factor,
-            },
-            batch_size=batch_size,
-            offset=query.offset,
-        ).to_reader()
-
-
-@pytest.fixture
-def table(tmp_path) -> MockTable:
+@pytest.fixture(scope="module")
+def table(tmpdir_factory) -> lancedb.table.Table:
+    tmp_path = str(tmpdir_factory.mktemp("data"))
+    db = lancedb.connect(tmp_path)
    df = pa.table(
        {
            "vector": pa.array(
@@ -68,8 +30,7 @@ def table(tmp_path) -> MockTable:
            "float_field": pa.array([1.0, 2.0]),
        }
    )
-    lance.write_dataset(df, tmp_path)
-    return MockTable(tmp_path)
+    return db.create_table("test", df)


@pytest_asyncio.fixture
@@ -126,6 +87,12 @@ def test_query_builder(table):
    assert all(np.array(rs[0]["vector"]) == [1, 2])


+def test_with_row_id(table: lancedb.table.Table):
+    rs = table.search().with_row_id(True).to_arrow()
+    assert "_rowid" in rs.column_names
+    assert rs["_rowid"].to_pylist() == [0, 1]
+
+
 def test_vector_query_with_no_limit(table):
    with pytest.raises(ValueError):
        LanceVectorQueryBuilder(table, [0, 0], "vector").limit(0).select(
@@ -365,6 +332,12 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
    assert df.shape == (0, 4)


+@pytest.mark.asyncio
+async def test_none_query(table_async: AsyncTable):
+    with pytest.raises(ValueError):
+        await table_async.query().nearest_to(None).to_arrow()
+
+
@pytest.mark.asyncio
 async def test_fast_search_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -103,6 +103,47 @@ async def test_async_remote_db():
        assert table_names == []


+@pytest.mark.asyncio
+async def test_async_checkout():
+    def handler(request):
+        if request.path == "/v1/table/test/describe/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            response = json.dumps({"version": 42, "schema": {"fields": []}})
+            request.wfile.write(response.encode())
+            return
+
+        content_len = int(request.headers.get("Content-Length"))
+        body = request.rfile.read(content_len)
+        body = json.loads(body)
+
+        print("body is", body)
+
+        count = 0
+        if body["version"] == 1:
+            count = 100
+        elif body["version"] == 2:
+            count = 200
+        elif body["version"] is None:
+            count = 300
+
+        request.send_response(200)
+        request.send_header("Content-Type", "application/json")
+        request.end_headers()
+        request.wfile.write(json.dumps(count).encode())
+
+    async with mock_lancedb_connection_async(handler) as db:
+        table = await db.open_table("test")
+        assert await table.count_rows() == 300
+        await table.checkout(1)
+        assert await table.count_rows() == 100
+        await table.checkout(2)
+        assert await table.count_rows() == 200
+        await table.checkout_latest()
+        assert await table.count_rows() == 300
+
+
@pytest.mark.asyncio
 async def test_http_error():
    request_id_holder = {"request_id": None}
@@ -185,8 +226,10 @@ def test_query_sync_minimal():
            "k": 10,
            "prefilter": False,
            "refine_factor": None,
+            "ef": None,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 20,
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -197,6 +240,24 @@ def test_query_sync_minimal():
        assert data == expected


+def test_query_sync_empty_query():
+    def handler(body):
+        assert body == {
+            "k": 10,
+            "filter": "true",
+            "vector": [],
+            "columns": ["id"],
+            "version": None,
+        }
+
+        return pa.table({"id": [1, 2, 3]})
+
+    with query_test_table(handler) as table:
+        data = table.search(None).where("true").select(["id"]).limit(10).to_list()
+        expected = [{"id": 1}, {"id": 2}, {"id": 3}]
+        assert data == expected
+
+
 def test_query_sync_maximal():
    def handler(body):
        assert body == {
@@ -206,11 +267,13 @@ def test_query_sync_maximal():
            "refine_factor": 10,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 5,
+            "ef": None,
            "filter": "id > 0",
            "columns": ["id", "name"],
            "vector_column": "vector2",
            "fast_search": True,
            "with_row_id": True,
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
@@ -229,6 +292,17 @@ def test_query_sync_maximal():
        )


+def test_query_sync_multiple_vectors():
+    def handler(_body):
+        return pa.table({"id": [1]})
+
+    with query_test_table(handler) as table:
+        results = table.search([[1, 2, 3], [4, 5, 6]]).limit(1).to_list()
+        assert len(results) == 2
+        results.sort(key=lambda x: x["query_index"])
+        assert results == [{"id": 1, "query_index": 0}, {"id": 1, "query_index": 1}]
+
+
 def test_query_sync_fts():
    def handler(body):
        assert body == {
@@ -238,6 +312,7 @@ def test_query_sync_fts():
            },
            "k": 10,
            "vector": [],
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -254,6 +329,7 @@ def test_query_sync_fts():
            "k": 42,
            "vector": [],
            "with_row_id": True,
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -279,6 +355,7 @@ def test_query_sync_hybrid():
                "k": 42,
                "vector": [],
                "with_row_id": True,
+                "version": None,
            }
            return pa.table({"_rowid": [1, 2, 3], "_score": [0.1, 0.2, 0.3]})
        else:
@@ -290,7 +367,9 @@ def test_query_sync_hybrid():
                "refine_factor": None,
                "vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                "nprobes": 20,
+                "ef": None,
                "with_row_id": True,
+                "version": None,
            }
            return pa.table({"_rowid": [1, 2, 3], "_distance": [0.1, 0.2, 0.3]})

--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -240,6 +240,121 @@ def test_add(db):
    _add(table, schema)


+def test_add_subschema(tmp_path):
+    db = lancedb.connect(tmp_path)
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("item", pa.string(), nullable=True),
+            pa.field("price", pa.float64(), nullable=False),
+        ]
+    )
+    table = db.create_table("test", schema=schema)
+
+    data = {"price": 10.0, "item": "foo"}
+    table.add([data])
+    data = {"price": 2.0, "vector": [3.1, 4.1]}
+    table.add([data])
+    data = {"price": 3.0, "vector": [5.9, 26.5], "item": "bar"}
+    table.add([data])
+
+    expected = pa.table(
+        {
+            "vector": [None, [3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", None, "bar"],
+            "price": [10.0, 2.0, 3.0],
+        },
+        schema=schema,
+    )
+    assert table.to_arrow() == expected
+
+    data = {"item": "foo"}
+    # We can't omit a column if it's not nullable
+    with pytest.raises(OSError, match="Invalid user input"):
+        table.add([data])
+
+    # We can add it if we make the column nullable
+    table.alter_columns(dict(path="price", nullable=True))
+    table.add([data])
+
+    expected_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("item", pa.string(), nullable=True),
+            pa.field("price", pa.float64(), nullable=True),
+        ]
+    )
+    expected = pa.table(
+        {
+            "vector": [None, [3.1, 4.1], [5.9, 26.5], None],
+            "item": ["foo", None, "bar", "foo"],
+            "price": [10.0, 2.0, 3.0, None],
+        },
+        schema=expected_schema,
+    )
+    assert table.to_arrow() == expected
+
+
+def test_add_nullability(tmp_path):
+    db = lancedb.connect(tmp_path)
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=False),
+            pa.field("id", pa.string(), nullable=False),
+        ]
+    )
+    table = db.create_table("test", schema=schema)
+
+    nullable_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("id", pa.string(), nullable=True),
+        ]
+    )
+    data = pa.table(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "id": ["foo", "bar"],
+        },
+        schema=nullable_schema,
+    )
+    # We can add nullable schema if it doesn't actually contain nulls
+    table.add(data)
+
+    expected = data.cast(schema)
+    assert table.to_arrow() == expected
+
+    data = pa.table(
+        {
+            "vector": [None],
+            "id": ["baz"],
+        },
+        schema=nullable_schema,
+    )
+    # We can't add nullable schema if it contains nulls
+    with pytest.raises(Exception, match="Vector column vector has NaNs"):
+        table.add(data)
+
+    # But we can make it nullable
+    table.alter_columns(dict(path="vector", nullable=True))
+    table.add(data)
+
+    expected_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("id", pa.string(), nullable=False),
+        ]
+    )
+    expected = pa.table(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5], None],
+            "id": ["foo", "bar", "baz"],
+        },
+        schema=expected_schema,
+    )
+    assert table.to_arrow() == expected
+
+
 def test_add_pydantic_model(db):
    # https://github.com/lancedb/lancedb/issues/562

@@ -892,10 +1007,15 @@ def test_empty_query(db):
    table = LanceTable.create(db, "my_table2", data=[{"id": i} for i in range(100)])
    df = table.search().select(["id"]).to_pandas()
    assert len(df) == 10
+    # None is the same as default
    df = table.search().select(["id"]).limit(None).to_pandas()
-    assert len(df) == 100
+    assert len(df) == 10
+    # invalid limist is the same as None, wihch is the same as default
    df = table.search().select(["id"]).limit(-1).to_pandas()
-    assert len(df) == 100
+    assert len(df) == 10
+    # valid limit should work
+    df = table.search().select(["id"]).limit(42).to_pandas()
+    assert len(df) == 42


 def test_search_with_schema_inf_single_vector(db):
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -142,6 +142,13 @@ impl VectorQuery {
        self.inner = self.inner.clone().only_if(predicate);
    }

+    pub fn add_query_vector(&mut self, vector: Bound<'_, PyAny>) -> PyResult<()> {
+        let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?;
+        let array = make_array(data);
+        self.inner = self.inner.clone().add_query_vector(array).infer_error()?;
+        Ok(())
+    }
+
    pub fn select(&mut self, columns: Vec<(String, String)>) {
        self.inner = self.inner.clone().select(Select::dynamic(&columns));
    }
@@ -188,6 +195,10 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    pub fn ef(&mut self, ef: u32) {
+        self.inner = self.inner.clone().ef(ef as usize);
+    }
+
    pub fn bypass_vector_index(&mut self) {
        self.inner = self.inner.clone().bypass_vector_index()
    }
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -8,7 +8,7 @@ use lancedb::table::{
 use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pymethods,
-    types::{PyDict, PyDictMethods, PyString},
+    types::{IntoPyDict, PyDict, PyDictMethods, PyString},
    Bound, FromPyObject, PyAny, PyRef, PyResult, Python, ToPyObject,
 };
 use pyo3_asyncio_0_21::tokio::future_into_py;
@@ -246,6 +246,33 @@ impl Table {
        )
    }

+    pub fn list_versions(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            let versions = inner.list_versions().await.infer_error()?;
+            let versions_as_dict = Python::with_gil(|py| {
+                versions
+                    .iter()
+                    .map(|v| {
+                        let dict = PyDict::new_bound(py);
+                        dict.set_item("version", v.version).unwrap();
+                        dict.set_item(
+                            "timestamp",
+                            v.timestamp.timestamp_nanos_opt().unwrap_or_default(),
+                        )
+                        .unwrap();
+
+                        let tup: Vec<(&String, &String)> = v.metadata.iter().collect();
+                        dict.set_item("metadata", tup.into_py_dict(py)).unwrap();
+                        dict.to_object(py)
+                    })
+                    .collect::<Vec<_>>()
+            });
+
+            Ok(versions_as_dict)
+        })
+    }
+
    pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.13.0-beta.1"
+version = "0.13.0"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.13.0-beta.1"
+version = "0.13.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -46,10 +46,18 @@ serde = { version = "^1" }
 serde_json = { version = "1" }
 async-openai = { version = "0.20.0", optional = true }
 serde_with = { version = "3.8.1" }
+aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
 # For remote feature
-reqwest = { version = "0.12.0", features = ["gzip", "json", "stream"], optional = true }
-rand = { version = "0.8.3", features = ["small_rng"], optional = true}
-http = { version = "1",  optional = true } # Matching what is in reqwest
+reqwest = { version = "0.12.0", default-features = false, features = [
+    "charset",
+    "gzip",
+    "http2",
+    "json",
+    "macos-system-configuration",
+    "stream",
+], optional = true }
+rand = { version = "0.8.3", features = ["small_rng"], optional = true }
+http = { version = "1", optional = true } # Matching what is in reqwest
 uuid = { version = "1.7.0", features = ["v4"], optional = true }
 polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
 polars = { version = ">=0.37,<0.40.0", optional = true }
@@ -72,11 +80,13 @@ aws-config = { version = "1.0" }
 aws-smithy-runtime = { version = "1.3" }
 http-body = "1" # Matching reqwest

+
 [features]
-default = []
+default = ["default-tls"]
 remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
 fp16kernels = ["lance-linalg/fp16kernels"]
 s3-test = []
+bedrock = ["dep:aws-sdk-bedrockruntime"]
 openai = ["dep:async-openai", "dep:reqwest"]
 polars = ["dep:polars-arrow", "dep:polars"]
 sentence-transformers = [
@@ -87,6 +97,11 @@ sentence-transformers = [
    "dep:tokenizers"
 ]

+# TLS
+default-tls = ["reqwest?/default-tls"]
+native-tls = ["reqwest?/native-tls"]
+rustls-tls = ["reqwest?/rustls-tls"]
+
 [[example]]
 name = "openai"
 required-features = ["openai"]
@@ -94,3 +109,7 @@ required-features = ["openai"]
 [[example]]
 name = "sentence_transformers"
 required-features = ["sentence-transformers"]
+
+[[example]]
+name = "bedrock"
+required-features = ["bedrock"]
--- a/rust/lancedb/examples/bedrock.rs
+++ b/rust/lancedb/examples/bedrock.rs
@@ -0,0 +1,89 @@
+use std::{iter::once, sync::Arc};
+
+use arrow_array::{Float64Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray};
+use arrow_schema::{DataType, Field, Schema};
+use aws_config::Region;
+use aws_sdk_bedrockruntime::Client;
+use futures::StreamExt;
+use lancedb::{
+    arrow::IntoArrow,
+    connect,
+    embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
+    query::{ExecutableQuery, QueryBase},
+    Result,
+};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let tempdir = tempfile::tempdir().unwrap();
+    let tempdir = tempdir.path().to_str().unwrap();
+
+    // create Bedrock embedding function
+    let region: String = "us-east-1".to_string();
+    let config = aws_config::defaults(aws_config::BehaviorVersion::latest())
+        .region(Region::new(region))
+        .load()
+        .await;
+
+    let embedding = Arc::new(BedrockEmbeddingFunction::new(
+        Client::new(&config), // AWS Region
+    ));
+
+    let db = connect(tempdir).execute().await?;
+    db.embedding_registry()
+        .register("bedrock", embedding.clone())?;
+
+    let table = db
+        .create_table("vectors", make_data())
+        .add_embedding(EmbeddingDefinition::new(
+            "text",
+            "bedrock",
+            Some("embeddings"),
+        ))?
+        .execute()
+        .await?;
+
+    // execute vector search
+    let query = Arc::new(StringArray::from_iter_values(once("something warm")));
+    let query_vector = embedding.compute_query_embeddings(query)?;
+    let mut results = table
+        .vector_search(query_vector)?
+        .limit(1)
+        .execute()
+        .await?;
+
+    let rb = results.next().await.unwrap()?;
+    let out = rb
+        .column_by_name("text")
+        .unwrap()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    let text = out.iter().next().unwrap().unwrap();
+    println!("Closest match: {}", text);
+    Ok(())
+}
+
+fn make_data() -> impl IntoArrow {
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("text", DataType::Utf8, false),
+        Field::new("price", DataType::Float64, false),
+    ]);
+
+    let id = Int32Array::from(vec![1, 2, 3, 4]);
+    let text = StringArray::from_iter_values(vec![
+        "Black T-Shirt",
+        "Leather Jacket",
+        "Winter Parka",
+        "Hooded Sweatshirt",
+    ]);
+    let price = Float64Array::from(vec![10.0, 50.0, 100.0, 30.0]);
+    let schema = Arc::new(schema);
+    let rb = RecordBatch::try_new(
+        schema.clone(),
+        vec![Arc::new(id), Arc::new(text), Arc::new(price)],
+    )
+    .unwrap();
+    Box::new(RecordBatchIterator::new(vec![Ok(rb)], schema))
+}
--- a/rust/lancedb/src/embeddings.rs
+++ b/rust/lancedb/src/embeddings.rs
@@ -17,6 +17,9 @@ pub mod openai;
 #[cfg(feature = "sentence-transformers")]
 pub mod sentence_transformers;

+#[cfg(feature = "bedrock")]
+pub mod bedrock;
+
 use lance::arrow::RecordBatchExt;
 use std::{
    borrow::Cow,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Lance Release	96933d7df8	Bump version: 0.16.0 → 0.16.1-beta.0	2024-11-21 21:52:39 +00:00
Lei Xu	d369233b3d	feat: bump lance to 0.20.0b2 (#1865 ) Bump lance version. Upstream change log: https://github.com/lancedb/lance/releases/tag/v0.20.0-beta.2	2024-11-21 13:16:59 -08:00
QianZhu	43a670ed4b	fix: `limit` docstring change (#1860 )	2024-11-21 10:50:50 -08:00
Bert	cb9a00a28d	feat: add list_versions to typescript, rust and remote python sdks (#1850 ) Will require update to lance dependency to bring in this change which makes the version serializable https://github.com/lancedb/lance/pull/3143	2024-11-21 13:35:14 -05:00
Max Epstein	72af977a73	fix(CohereReranker): updated default model_name param to newest v3 (#1862 )	2024-11-21 09:02:49 -08:00
Bert	7cecb71df0	feat: support for checkout and checkout_latest in remote sdks (#1863 )	2024-11-21 11:28:46 -05:00
QianZhu	285071e5c8	docs: full-text search doc update (#1861 ) Co-authored-by: BubbleCal <bubble-cal@outlook.com>	2024-11-20 21:07:30 -08:00
QianZhu	114866fbcf	docs: OSS doc improvement (#1859 ) OSS doc improvement - HNSW index parameter explanation and others. --------- Co-authored-by: BubbleCal <bubble-cal@outlook.com>	2024-11-20 17:51:11 -08:00
Frank Liu	5387c0e243	docs: add Voyage models to sidebar (#1858 )	2024-11-20 14:20:14 -08:00
Mr. Doge	53d1535de1	ci: musl x64,arm64 (#1853 ) untested 4 artifacts at: https://github.com/FuPeiJiang/lancedb/actions/runs/11926579058 node-native-linux-aarch64-musl 22.6 MB node-native-linux-x86_64-musl 23.6 MB nodejs-native-linux-aarch64-musl 26.7 MB nodejs-native-linux-x86_64-musl 27 MB this follows the same process as: https://github.com/lancedb/lancedb/pull/1816#issuecomment-2484816669 Closes #1388 Closes #1107 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2024-11-20 10:53:19 -08:00
BubbleCal	b2f88f0b29	feat: support to sepcify ef search param (#1844 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-19 23:12:25 +08:00
fzowl	f2e3989831	docs: voyageai embedding in the index (#1813 ) The code to support VoyageAI embedding and rerank models was added in the https://github.com/lancedb/lancedb/pull/1799 PR. Some of the documentation changes was also made, here adding the VoyageAI embedding doc link to the index page. These are my first PRs in lancedb and while i checked the documentation/code structure, i might missed something important. Please let me know if any changes required!	2024-11-18 14:34:16 -08:00
Emmanuel Ferdman	83ae52938a	docs: update migration reference (#1837 ) # PR Summary PR fixes the `migration.md` reference in `docs/src/guides/tables.md`. On the way, it also fixes some typos found in that document. Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>	2024-11-18 14:33:32 -08:00
Lei Xu	267aa83bf8	feat(python): check vector query is not None (#1847 ) Fix the type hints of `nearest_to` method, and raise `ValueError` when the input is None	2024-11-18 14:15:22 -08:00
Will Jones	cc72050206	chore: update package locks (#1845 ) Also ran `npm audit`.	2024-11-18 13:44:06 -08:00
Will Jones	72543c8b9d	test(python): test `with_row_id` in sync query (#1835 ) Also remove weird `MockTable` fixture.	2024-11-18 11:32:52 -08:00
Will Jones	97d6210c33	ci: remove invalid references (#1834 ) Fix release job	2024-11-18 11:32:44 -08:00
Ho Kim	a3d0c27b0a	feat: add support for rustls (#1842 ) Hello, this is a simple PR that supports `rustls-tls` feature. The `reqwest`\`s default TLS `default-tls` is enabled by default, to dismiss the side-effect. The user can use `rustls-tls` like this: ```toml lancedb = { version = "*", default-features = false, features = ["rustls-tls"] } ```	2024-11-18 10:36:20 -08:00
BubbleCal	b23d8abcdd	docs: introduce incremental indexing for FTS (#1789 ) don't merge it before https://github.com/lancedb/lancedb/pull/1769 merged --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-18 20:21:28 +08:00
Rob Meng	e3ea5cf9b9	chore: bump lance to 0.19.3 (#1839 )	2024-11-16 14:57:52 -05:00
Lance Release	4f8b086175	Updating package-lock.json	2024-11-15 20:18:16 +00:00
Lance Release	72330fb759	Bump version: 0.13.0-beta.3 → 0.13.0	2024-11-15 20:17:59 +00:00
Lance Release	e3b2c5f438	Bump version: 0.13.0-beta.2 → 0.13.0-beta.3	2024-11-15 20:17:55 +00:00
Lance Release	66a881b33a	Bump version: 0.16.0-beta.2 → 0.16.0	2024-11-15 20:17:34 +00:00
Lance Release	a7515d6ee2	Bump version: 0.16.0-beta.1 → 0.16.0-beta.2	2024-11-15 20:17:34 +00:00
Will Jones	587c0824af	feat: flexible null handling and insert subschemas in Python (#1827 ) * Test that we can insert subschemas (omit nullable columns) in Python. * More work is needed to support this in Node. See: https://github.com/lancedb/lancedb/issues/1832 * Test that we can insert data with nullable schema but no nulls in non-nullable schema. * Add `"null"` option for `on_bad_vectors` where we fill with null if the vector is bad. * Make null values not considered bad if the field itself is nullable.	2024-11-15 11:33:00 -08:00
Will Jones	b38a4269d0	fix(node): make openai and huggingface optional dependencies (#1809 ) BREAKING CHANGE: openai and huggingface now have separate entrypoints. Closes [#1624](https://github.com/lancedb/lancedb/issues/1624)	2024-11-14 15:04:35 -08:00
Will Jones	119d88b9db	ci: disable Windows Arm64 until the release builds work (#1833 ) Started to actually fix this, but it was taking too long https://github.com/lancedb/lancedb/pull/1831	2024-11-14 15:04:23 -08:00
StevenSu	74f660d223	feat: add new feature, add amazon bedrock embedding function (#1788 ) Add amazon bedrock embedding function to rust sdk. 1. Add BedrockEmbeddingModel ( lancedb/src/embeddings/bedrock.rs) 2. Add example lancedb/examples/bedrock.rs	2024-11-14 11:04:59 -08:00
Lance Release	b2b0979b90	Updating package-lock.json	2024-11-14 04:42:38 +00:00
Lance Release	ee2a40b182	Bump version: 0.13.0-beta.1 → 0.13.0-beta.2	2024-11-14 04:42:19 +00:00
Lance Release	4ca0b15354	Bump version: 0.16.0-beta.0 → 0.16.0-beta.1	2024-11-14 04:41:56 +00:00
Rob Meng	d8c217b47d	chore: bump lance to 0.19.2 (#1829 )	2024-11-13 23:23:02 -05:00
Rob Meng	b724b1a01f	feat: support remote empty query (#1828 ) Support sending empty query types to remote lancedb. also include offset and limit, where were previously omitted.	2024-11-13 23:04:52 -05:00
Will Jones	abd75e0ead	feat: search multiple query vectors as one query (#1811 ) Allows users to pass multiple query vector as part of a single query plan. This just runs the queries in parallel without any further optimization. It's mostly a convenience. Previously, I think this was only handled by the sync Python remote API. This makes it common across all SDKs. Closes https://github.com/lancedb/lancedb/issues/1803 ```python >>> import lancedb >>> import asyncio >>> >>> async def main(): ... db = await lancedb.connect_async("./demo") ... table = await db.create_table("demo", [{"id": 1, "vector": [1, 2, 3]}, {"id": 2, "vector": [4, 5, 6]}], mode="overwrite") ... return await table.query().nearest_to([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [4.0, 5.0, 6.0]]).limit(1).to_pandas() ... >>> asyncio.run(main()) query_index id vector _distance 0 2 2 [4.0, 5.0, 6.0] 0.0 1 1 2 [4.0, 5.0, 6.0] 0.0 2 0 1 [1.0, 2.0, 3.0] 0.0 ```	2024-11-13 16:05:16 -08:00
Will Jones	0fd8a50bd7	ci(node): run examples in CI (#1796 ) This is done as setup for a PR that will fix the OpenAI dependency issue. * [x] FTS examples * [x] Setup mock openai * [x] Ran `npm audit fix` * [x] sentences embeddings test * [x] Double check formatting of docs examples	2024-11-13 11:10:56 -08:00
Umut Hope YILDIRIM	9f228feb0e	ci: remove cache to fix build issues on windows arm runner (#1820 )	2024-11-13 09:27:10 -08:00
Ayush Chaurasia	90e9c52d0a	docs: update hybrid search example to latest langchain (#1824 ) Co-authored-by: qzhu <qian@lancedb.com>	2024-11-12 20:06:25 -08:00
Will Jones	68974a4e06	ci: add index URL to fix failing docs build (#1823 )	2024-11-12 16:54:22 -08:00
Lei Xu	4c9bab0d92	fix: use pandas with pydantic embedding column (#1818 ) * Make Pandas `DataFrame` works with embedding function + Subset of columns * Make `lancedb.create_table()` work with embedding function	2024-11-11 14:48:56 -08:00
QianZhu	5117aecc38	docs: search param explanation for OSS doc (#1815 ) ![Screenshot 2024-11-09 at 11 09 14 AM](https://github.com/user-attachments/assets/2aeba016-aeff-4658-85c6-8640285ba0c9)	2024-11-11 11:57:17 -08:00