Bump version: 0.16.0 → 0.16.1-beta.0

feat: bump lance to 0.20.0b2 (#1865 )
Bump lance version. Upstream change log: https://github.com/lancedb/lance/releases/tag/v0.20.0-beta.2
2025-12-23 13:29:57 +00:00 · 2024-11-21 21:52:39 +00:00 · 2024-11-21 13:16:59 -08:00 · 2024-11-21 10:50:50 -08:00 · 2024-11-21 13:35:14 -05:00 · 2024-11-21 09:02:49 -08:00
65 changed files with 1922 additions and 404 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.13.0-beta.1"
+current_version = "0.13.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -87,6 +87,16 @@ glob = "node/package.json"
 replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
 search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""

+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-linux-arm64-musl\": \"{new_version}\""
+search = "\"@lancedb/vectordb-linux-arm64-musl\": \"{current_version}\""
+
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-linux-x64-musl\": \"{new_version}\""
+search = "\"@lancedb/vectordb-linux-x64-musl\": \"{current_version}\""
+
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -31,6 +31,9 @@ rustflags = [
 [target.x86_64-unknown-linux-gnu]
 rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=+avx2,+fma,+f16c"]

+[target.x86_64-unknown-linux-musl]
+rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=-crt-static,+avx2,+fma,+f16c"]
+
 [target.aarch64-apple-darwin]
 rustflags = ["-C", "target-cpu=apple-m1", "-C", "target-feature=+neon,+fp16,+fhm,+dotprod"]

--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -104,7 +104,6 @@ jobs:
        OPENAI_BASE_URL: http://0.0.0.0:8000
      run: |
        python ci/mock_openai.py &
-        ss -ltnp | grep :8000
        cd nodejs/examples
        npm test
  macos:
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -101,7 +101,7 @@ jobs:
          path: |
            nodejs/dist/*.node

-  node-linux:
+  node-linux-gnu:
    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
    runs-on: ${{ matrix.config.runner }}
    # Only runs on tags that matches the make-release action
@@ -137,11 +137,63 @@ jobs:
      - name: Upload Linux Artifacts
        uses: actions/upload-artifact@v4
        with:
-          name: node-native-linux-${{ matrix.config.arch }}
+          name: node-native-linux-${{ matrix.config.arch }}-gnu
          path: |
            node/dist/lancedb-vectordb-linux*.tgz

-  nodejs-linux:
+  node-linux-musl:
+    name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
+    runs-on: ubuntu-latest
+    container: alpine:edge
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - arch: x86_64
+          - arch: aarch64
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install common dependencies
+        run: |
+          apk add protobuf-dev curl clang mold grep npm bash
+          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
+          echo "source $HOME/.cargo/env" >> saved_env
+          echo "export CC=clang" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=-crt-static,+avx2,+fma,+f16c -Clinker=clang -Clink-arg=-fuse-ld=mold'" >> saved_env
+      - name: Configure aarch64 build
+        if: ${{ matrix.config.arch == 'aarch64' }}
+        run: |
+          source "$HOME/.cargo/env"
+          rustup target add aarch64-unknown-linux-musl --toolchain 1.80.0
+          crt=$(realpath $(dirname $(rustup which rustc))/../lib/rustlib/aarch64-unknown-linux-musl/lib/self-contained)
+          sysroot_lib=/usr/aarch64-unknown-linux-musl/usr/lib
+          apk_url=https://dl-cdn.alpinelinux.org/alpine/latest-stable/main/aarch64/
+          curl -sSf $apk_url > apk_list
+          for pkg in gcc libgcc musl; do curl -sSf $apk_url$(cat apk_list | grep -oP '(?<=")'$pkg'-\d.*?(?=")') | tar zxf -; done
+          mkdir -p $sysroot_lib
+          echo 'GROUP ( libgcc_s.so.1 -lgcc )' > $sysroot_lib/libgcc_s.so
+          cp usr/lib/libgcc_s.so.1 $sysroot_lib
+          cp usr/lib/gcc/aarch64-alpine-linux-musl/*/libgcc.a $sysroot_lib
+          cp lib/ld-musl-aarch64.so.1 $sysroot_lib/libc.so
+          echo '!<arch>' > $sysroot_lib/libdl.a
+          (cd $crt && cp crti.o crtbeginS.o crtendS.o crtn.o -t $sysroot_lib)
+          echo "export CARGO_BUILD_TARGET=aarch64-unknown-linux-musl" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-cpu=apple-m1 -Ctarget-feature=-crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=--target=aarch64-unknown-linux-musl -Clink-arg=--sysroot=/usr/aarch64-unknown-linux-musl -Clink-arg=-lc'" >> saved_env
+      - name: Build Linux Artifacts
+        run: |
+          source ./saved_env
+          bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }}
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: node-native-linux-${{ matrix.config.arch }}-musl
+          path: |
+            node/dist/lancedb-vectordb-linux*.tgz
+
+  nodejs-linux-gnu:
    name: lancedb (${{ matrix.config.arch}}-unknown-linux-gnu
    runs-on: ${{ matrix.config.runner }}
    # Only runs on tags that matches the make-release action
@@ -178,7 +230,7 @@ jobs:
      - name: Upload Linux Artifacts
        uses: actions/upload-artifact@v4
        with:
-          name: nodejs-native-linux-${{ matrix.config.arch }}
+          name: nodejs-native-linux-${{ matrix.config.arch }}-gnu
          path: |
            nodejs/dist/*.node
      # The generic files are the same in all distros so we just pick
@@ -192,6 +244,62 @@ jobs:
            nodejs/dist/*
            !nodejs/dist/*.node

+  nodejs-linux-musl:
+    name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
+    runs-on: ubuntu-latest
+    container: alpine:edge
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - arch: x86_64
+          - arch: aarch64
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install common dependencies
+        run: |
+          apk add protobuf-dev curl clang mold grep npm bash openssl-dev openssl-libs-static
+          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y --default-toolchain 1.80.0
+          echo "source $HOME/.cargo/env" >> saved_env
+          echo "export CC=clang" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-cpu=haswell -Ctarget-feature=-crt-static,+avx2,+fma,+f16c -Clinker=clang -Clink-arg=-fuse-ld=mold'" >> saved_env
+          echo "export X86_64_UNKNOWN_LINUX_MUSL_OPENSSL_INCLUDE_DIR=/usr/include" >> saved_env
+          echo "export X86_64_UNKNOWN_LINUX_MUSL_OPENSSL_LIB_DIR=/usr/lib" >> saved_env
+      - name: Configure aarch64 build
+        if: ${{ matrix.config.arch == 'aarch64' }}
+        run: |
+          source "$HOME/.cargo/env"
+          rustup target add aarch64-unknown-linux-musl --toolchain 1.80.0
+          crt=$(realpath $(dirname $(rustup which rustc))/../lib/rustlib/aarch64-unknown-linux-musl/lib/self-contained)
+          sysroot_lib=/usr/aarch64-unknown-linux-musl/usr/lib
+          apk_url=https://dl-cdn.alpinelinux.org/alpine/latest-stable/main/aarch64/
+          curl -sSf $apk_url > apk_list
+          for pkg in gcc libgcc musl openssl-dev openssl-libs-static; do curl -sSf $apk_url$(cat apk_list | grep -oP '(?<=")'$pkg'-\d.*?(?=")') | tar zxf -; done
+          mkdir -p $sysroot_lib
+          echo 'GROUP ( libgcc_s.so.1 -lgcc )' > $sysroot_lib/libgcc_s.so
+          cp usr/lib/libgcc_s.so.1 $sysroot_lib
+          cp usr/lib/gcc/aarch64-alpine-linux-musl/*/libgcc.a $sysroot_lib
+          cp lib/ld-musl-aarch64.so.1 $sysroot_lib/libc.so
+          echo '!<arch>' > $sysroot_lib/libdl.a
+          (cd $crt && cp crti.o crtbeginS.o crtendS.o crtn.o -t $sysroot_lib)
+          echo "export CARGO_BUILD_TARGET=aarch64-unknown-linux-musl" >> saved_env
+          echo "export RUSTFLAGS='-Ctarget-feature=-crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=--target=aarch64-unknown-linux-musl -Clink-arg=--sysroot=/usr/aarch64-unknown-linux-musl -Clink-arg=-lc'" >> saved_env
+          echo "export AARCH64_UNKNOWN_LINUX_MUSL_OPENSSL_INCLUDE_DIR=$(realpath usr/include)" >> saved_env
+          echo "export AARCH64_UNKNOWN_LINUX_MUSL_OPENSSL_LIB_DIR=$(realpath usr/lib)" >> saved_env
+      - name: Build Linux Artifacts
+        run: |
+          source ./saved_env
+          bash ci/manylinux_node/build_lancedb.sh ${{ matrix.config.arch }}
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: nodejs-native-linux-${{ matrix.config.arch }}-musl
+          path: |
+            nodejs/dist/*.node
+
  node-windows:
    name: vectordb ${{ matrix.target }}
    runs-on: windows-2022
@@ -226,108 +334,109 @@ jobs:
          path: |
            node/dist/lancedb-vectordb-win32*.tgz

-  node-windows-arm64:
-    name: vectordb win32-arm64-msvc
-    runs-on: windows-4x-arm
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: actions/checkout@v4
-      - name: Install Git
-        run: |
-          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
-          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
-        shell: powershell
-      - name: Add Git to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
-          $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        shell: powershell
-      - name: Configure Git symlinks
-        run: git config --global core.symlinks true
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-      - name: Install Visual Studio Build Tools
-        run: |
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
-          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
-            "--installPath", "C:\BuildTools", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
-            "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
-        shell: powershell
-      - name: Add Visual Studio Build Tools to PATH
-        run: |
-          $vsPath = "C:\BuildTools\VC\Tools\MSVC"
-          $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
+  # TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
+  # node-windows-arm64:
+  #   name: vectordb win32-arm64-msvc
+  #   runs-on: windows-4x-arm
+  #   if: startsWith(github.ref, 'refs/tags/v')
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Install Git
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
+  #         Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
+  #       shell: powershell
+  #     - name: Add Git to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
+  #         $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  #       shell: powershell
+  #     - name: Configure Git symlinks
+  #       run: git config --global core.symlinks true
+  #     - uses: actions/checkout@v4
+  #     - uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.13"
+  #     - name: Install Visual Studio Build Tools
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
+  #         Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
+  #           "--installPath", "C:\BuildTools", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
+  #           "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
+  #       shell: powershell
+  #     - name: Add Visual Studio Build Tools to PATH
+  #       run: |
+  #         $vsPath = "C:\BuildTools\VC\Tools\MSVC"
+  #         $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"

-          # Add MSVC runtime libraries to LIB
-          $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" + 
-                     "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
-                     "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
-          Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
+  #         # Add MSVC runtime libraries to LIB
+  #         $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
+  #                    "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
+  #                    "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+  #         Add-Content $env:GITHUB_ENV "LIB=$env:LIB"

-          # Add INCLUDE paths
-          $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
-          Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
-        shell: powershell
-      - name: Install Rust
-        run: |
-          Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
-          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
-        shell: powershell
-      - name: Add Rust to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
-        shell: powershell
+  #         # Add INCLUDE paths
+  #         $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
+  #                       "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
+  #                       "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
+  #                       "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
+  #         Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
+  #       shell: powershell
+  #     - name: Install Rust
+  #       run: |
+  #         Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+  #         .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+  #       shell: powershell
+  #     - name: Add Rust to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
+  #       shell: powershell

-      - uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: rust
-      - name: Install 7-Zip ARM
-        run: |
-          New-Item -Path 'C:\7zip' -ItemType Directory
-          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
-          Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
-        shell: powershell
-      - name: Add 7-Zip to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
-        shell: powershell
-      - name: Install Protoc v21.12
-        working-directory: C:\
-        run: |
-          if (Test-Path 'C:\protoc') {
-              Write-Host "Protoc directory exists, skipping installation"
-              return
-          }
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
-        shell: powershell
-      - name: Add Protoc to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
-      - name: Upload Windows ARM64 Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-windows-arm64
-          path: |
-            node/dist/*.node
+  #     - uses: Swatinem/rust-cache@v2
+  #       with:
+  #         workspaces: rust
+  #     - name: Install 7-Zip ARM
+  #       run: |
+  #         New-Item -Path 'C:\7zip' -ItemType Directory
+  #         Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
+  #         Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
+  #       shell: powershell
+  #     - name: Add 7-Zip to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
+  #       shell: powershell
+  #     - name: Install Protoc v21.12
+  #       working-directory: C:\
+  #       run: |
+  #         if (Test-Path 'C:\protoc') {
+  #             Write-Host "Protoc directory exists, skipping installation"
+  #             return
+  #         }
+  #         New-Item -Path 'C:\protoc' -ItemType Directory
+  #         Set-Location C:\protoc
+  #         Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+  #         & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
+  #       shell: powershell
+  #     - name: Add Protoc to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+  #       shell: powershell
+  #     - name: Build Windows native node modules
+  #       run: .\ci\build_windows_artifacts.ps1 aarch64-pc-windows-msvc
+  #     - name: Upload Windows ARM64 Artifacts
+  #       uses: actions/upload-artifact@v4
+  #       with:
+  #         name: node-native-windows-arm64
+  #         path: |
+  #           node/dist/*.node

  nodejs-windows:
    name: lancedb ${{ matrix.target }}
@@ -363,102 +472,103 @@ jobs:
          path: |
            nodejs/dist/*.node

-  nodejs-windows-arm64:
-    name: lancedb win32-arm64-msvc
-    runs-on: windows-4x-arm
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: actions/checkout@v4
-      - name: Install Git
-        run: |
-          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
-          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
-        shell: powershell
-      - name: Add Git to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
-          $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        shell: powershell
-      - name: Configure Git symlinks
-        run: git config --global core.symlinks true
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-      - name: Install Visual Studio Build Tools
-        run: |
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
-          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
-            "--installPath", "C:\BuildTools", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
-            "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
-        shell: powershell
-      - name: Add Visual Studio Build Tools to PATH
-        run: |
-          $vsPath = "C:\BuildTools\VC\Tools\MSVC"
-          $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
+  # TODO: re-enable once working https://github.com/lancedb/lancedb/pull/1831
+  # nodejs-windows-arm64:
+  #   name: lancedb win32-arm64-msvc
+  #   runs-on: windows-4x-arm
+  #   if: startsWith(github.ref, 'refs/tags/v')
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Install Git
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
+  #         Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
+  #       shell: powershell
+  #     - name: Add Git to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
+  #         $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  #       shell: powershell
+  #     - name: Configure Git symlinks
+  #       run: git config --global core.symlinks true
+  #     - uses: actions/checkout@v4
+  #     - uses: actions/setup-python@v5
+  #       with:
+  #         python-version: "3.13"
+  #     - name: Install Visual Studio Build Tools
+  #       run: |
+  #         Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
+  #         Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
+  #           "--installPath", "C:\BuildTools", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
+  #           "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
+  #           "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
+  #       shell: powershell
+  #     - name: Add Visual Studio Build Tools to PATH
+  #       run: |
+  #         $vsPath = "C:\BuildTools\VC\Tools\MSVC"
+  #         $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
+  #         Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
+  #         Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"

-          $env:LIB = ""
-          Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
-        shell: powershell
-      - name: Install Rust
-        run: |
-          Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
-          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
-        shell: powershell
-      - name: Add Rust to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
-        shell: powershell
+  #         $env:LIB = ""
+  #         Add-Content $env:GITHUB_ENV "LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
+  #       shell: powershell
+  #     - name: Install Rust
+  #       run: |
+  #         Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+  #         .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+  #       shell: powershell
+  #     - name: Add Rust to PATH
+  #       run: |
+  #         Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
+  #       shell: powershell

-      - uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: rust
-      - name: Install 7-Zip ARM
-        run: |
-          New-Item -Path 'C:\7zip' -ItemType Directory
-          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
-          Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
-        shell: powershell
-      - name: Add 7-Zip to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
-        shell: powershell
-      - name: Install Protoc v21.12
-        working-directory: C:\
-        run: |
-          if (Test-Path 'C:\protoc') {
-              Write-Host "Protoc directory exists, skipping installation"
-              return
-          }
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
-        shell: powershell
-      - name: Add Protoc to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
-      - name: Upload Windows ARM64 Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: nodejs-native-windows-arm64
-          path: |
-            nodejs/dist/*.node
+  #     - uses: Swatinem/rust-cache@v2
+  #       with:
+  #         workspaces: rust
+  #     - name: Install 7-Zip ARM
+  #       run: |
+  #         New-Item -Path 'C:\7zip' -ItemType Directory
+  #         Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
+  #         Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
+  #       shell: powershell
+  #     - name: Add 7-Zip to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
+  #       shell: powershell
+  #     - name: Install Protoc v21.12
+  #       working-directory: C:\
+  #       run: |
+  #         if (Test-Path 'C:\protoc') {
+  #             Write-Host "Protoc directory exists, skipping installation"
+  #             return
+  #         }
+  #         New-Item -Path 'C:\protoc' -ItemType Directory
+  #         Set-Location C:\protoc
+  #         Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+  #         & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
+  #       shell: powershell
+  #     - name: Add Protoc to PATH
+  #       run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+  #       shell: powershell
+  #     - name: Build Windows native node modules
+  #       run: .\ci\build_windows_artifacts_nodejs.ps1 aarch64-pc-windows-msvc
+  #     - name: Upload Windows ARM64 Artifacts
+  #       uses: actions/upload-artifact@v4
+  #       with:
+  #         name: nodejs-native-windows-arm64
+  #         path: |
+  #           nodejs/dist/*.node

  release:
    name: vectordb NPM Publish
-    needs: [node, node-macos, node-linux, node-windows, node-windows-arm64]
+    needs: [node, node-macos, node-linux-gnu, node-linux-musl, node-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -476,7 +586,7 @@ jobs:
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
        run: |
-          # Tag beta as "preview" instead of default "latest". See lancedb 
+          # Tag beta as "preview" instead of default "latest". See lancedb
          # npm publish step for more info.
          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
            PUBLISH_ARGS="--tag preview"
@@ -498,7 +608,7 @@ jobs:

  release-nodejs:
    name: lancedb NPM Publish
-    needs: [nodejs-macos, nodejs-linux, nodejs-windows, nodejs-windows-arm64]
+    needs: [nodejs-macos, nodejs-linux-gnu, nodejs-linux-musl, nodejs-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,18 +18,18 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
+rust-version = "1.80.0"                                                     # TODO: lower this once we upgrade Lance again.

 [workspace.dependencies]
-lance = { "version" = "=0.19.2", "features" = [
+lance = { "version" = "=0.20.0", "features" = [
    "dynamodb",
-], git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-index = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-linalg = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-table = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-testing = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-datafusion = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-encoding = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
+], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
+lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
 # Note that this one does not include pyarrow
 arrow = { version = "52.2", optional = false }
 arrow-array = "52.2"
--- a/ci/manylinux_node/build_lancedb.sh
+++ b/ci/manylinux_node/build_lancedb.sh
@@ -11,7 +11,8 @@ fi
 export OPENSSL_STATIC=1
 export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl

-source $HOME/.bashrc
+#Alpine doesn't have .bashrc
+FILE=$HOME/.bashrc && test -f $FILE && source $FILE

 cd nodejs
 npm ci
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -5,13 +5,14 @@ ARCH=${1:-x86_64}

 if [ "$ARCH" = "x86_64" ]; then
    export OPENSSL_LIB_DIR=/usr/local/lib64/
-else 
+else
    export OPENSSL_LIB_DIR=/usr/local/lib/
 fi
 export OPENSSL_STATIC=1
 export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl

-source $HOME/.bashrc
+#Alpine doesn't have .bashrc
+FILE=$HOME/.bashrc && test -f $FILE && source $FILE

 cd node
 npm ci
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -138,6 +138,7 @@ nav:
              - Jina Reranker: reranking/jina.md
              - OpenAI Reranker: reranking/openai.md
              - AnswerDotAi Rerankers: reranking/answerdotai.md
+              - Voyage AI Rerankers: reranking/voyageai.md
              - Building Custom Rerankers: reranking/custom_reranker.md
              - Example: notebooks/lancedb_reranking.ipynb
          - Filtering: sql.md
@@ -165,6 +166,7 @@ nav:
                  - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
                  - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
                  - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
+                  - Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
              - Multimodal Embedding Functions:
                  - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.4.6",
+      "version": "0.12.0",
      "cpu": [
        "x64",
        "arm64"
@@ -31,9 +31,7 @@
        "win32"
      ],
      "dependencies": {
-        "@apache-arrow/ts": "^14.0.2",
        "@neon-rs/load": "^0.0.74",
-        "apache-arrow": "^14.0.2",
        "axios": "^1.4.0"
      },
      "devDependencies": {
@@ -46,6 +44,7 @@
        "@types/temp": "^0.9.1",
        "@types/uuid": "^9.0.3",
        "@typescript-eslint/eslint-plugin": "^5.59.1",
+        "apache-arrow-old": "npm:apache-arrow@13.0.0",
        "cargo-cp-artifact": "^0.1",
        "chai": "^4.3.7",
        "chai-as-promised": "^7.1.1",
@@ -62,15 +61,19 @@
        "ts-node-dev": "^2.0.0",
        "typedoc": "^0.24.7",
        "typedoc-plugin-markdown": "^3.15.3",
-        "typescript": "*",
+        "typescript": "^5.1.0",
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.4.6",
-        "@lancedb/vectordb-darwin-x64": "0.4.6",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.4.6",
-        "@lancedb/vectordb-linux-x64-gnu": "0.4.6",
-        "@lancedb/vectordb-win32-x64-msvc": "0.4.6"
+        "@lancedb/vectordb-darwin-arm64": "0.12.0",
+        "@lancedb/vectordb-darwin-x64": "0.12.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
+      },
+      "peerDependencies": {
+        "@apache-arrow/ts": "^14.0.2",
+        "apache-arrow": "^14.0.2"
      }
    },
    "../node/node_modules/apache-arrow": {
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -277,7 +277,15 @@ Product quantization can lead to approximately `16 * sizeof(float32) / 1 = 64` t
 Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
 On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows lead to a good latency / recall.

-`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. Because
+`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. The number should be a factor of the vector dimension. Because
 PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
-less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and
-more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
+less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
+
+!!! note
+    if `num_sub_vectors` is set to be greater than the vector dimension, you will see errors like `attempt to divide by zero`
+
+### How to choose `m` and `ef_construction` for `IVF_HNSW_*` index?
+
+`m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption.
+
+`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
--- a/docs/src/concepts/index_hnsw.md
+++ b/docs/src/concepts/index_hnsw.md
@@ -57,6 +57,13 @@ Then the greedy search routine operates as follows:

 ## Usage

+There are three key parameters to set when constructing an HNSW index:
+
+* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
+* `m`: The number of neighbors to select for each vector in the HNSW graph.
+* `ef_construction`: The number of candidates to evaluate during the construction of the HNSW graph.
+
+
 We can combine the above concepts to understand how to build and query an HNSW index in LanceDB.

 ### Construct index
--- a/docs/src/concepts/index_ivfpq.md
+++ b/docs/src/concepts/index_ivfpq.md
@@ -58,8 +58,10 @@ In Python, the index can be created as follows:
 # Make sure you have enough data in the table for an effective training step
 tbl.create_index(metric="L2", num_partitions=256, num_sub_vectors=96)
 ```
+!!! note
+    `num_partitions`=256 and `num_sub_vectors`=96 does not work for every dataset. Those values needs to be adjusted for your particular dataset.

-The `num_partitions` is usually chosen to target a particular number of vectors per partition. `num_sub_vectors` is typically chosen based on the desired recall and the dimensionality of the vector. See the [FAQs](#faq) below for best practices on choosing these parameters.
+The `num_partitions` is usually chosen to target a particular number of vectors per partition. `num_sub_vectors` is typically chosen based on the desired recall and the dimensionality of the vector. See [here](../ann_indexes.md/#how-to-choose-num_partitions-and-num_sub_vectors-for-ivf_pq-index) for best practices on choosing these parameters.


 ### Query the index
--- a/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
@@ -20,7 +20,7 @@ Supported parameters (to be passed in `create` method) are:

 | Parameter | Type | Default Value | Description |
 |---|---|--------|---------|
-| `name` | `str` | `"voyage-3"` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
+| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
 | `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
 | `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |

--- a/docs/src/embeddings/default_embedding_functions.md
+++ b/docs/src/embeddings/default_embedding_functions.md
@@ -53,6 +53,7 @@ These functions are registered by default to handle text embeddings.
 | [**Jina Embeddings**](available_embedding_models/text_embedding_functions/jina_embedding.md "jina") | 🔗 World-class embedding models to improve your search and RAG systems. You will need **jina api key**. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/jina.png" alt="Jina Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/jina_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [ **AWS Bedrock Functions**](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md "bedrock-text") | ☁️ AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/aws_bedrock.png" alt="AWS Bedrock Icon" width="120" height="35">](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**IBM Watsonx.ai**](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md "watsonx") | 💡 Generate text embeddings using IBM's watsonx.ai platform. **Note**: watsonx.ai library is an optional dependency. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/watsonx.png" alt="Watsonx Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md) |
+| [**VoyageAI Embeddings**](available_embedding_models/text_embedding_functions/voyageai_embedding.md "voyageai") | 🌕 Voyage AI provides cutting-edge embedding and rerankers. This will help you get started with **VoyageAI** embedding models using LanceDB. Using voyageai API requires voyageai package. Install it via `pip`. | [<img src="https://www.voyageai.com/logo.svg" alt="VoyageAI Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/voyageai_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               



@@ -66,6 +67,7 @@ These functions are registered by default to handle text embeddings.
 [jina-key]: "jina"
 [aws-key]: "bedrock-text"
 [watsonx-key]: "watsonx"
+[voyageai-key]: "voyageai"


 ## Multi-modal Embedding Functions🖼️ 
--- a/docs/src/fts.md
+++ b/docs/src/fts.md
@@ -114,12 +114,45 @@ table.create_fts_index("text",

 LanceDB full text search supports to filter the search results by a condition, both pre-filtering and post-filtering are supported.

-This can be invoked via the familiar `where` syntax:
-
+This can be invoked via the familiar `where` syntax.
+ 
+With pre-filtering:
 === "Python"

    ```python
-    table.search("puppy").limit(10).where("meta='foo'").to_list()
+    table.search("puppy").limit(10).where("meta='foo'", prefilte=True).to_list()
+    ```
+
+=== "TypeScript"
+
+    ```typescript
+    await tbl
+    .search("puppy")
+    .select(["id", "doc"])
+    .limit(10)
+    .where("meta='foo'")
+    .prefilter(true)
+    .toArray();
+    ```
+
+=== "Rust"
+
+    ```rust
+    table
+        .query()
+        .full_text_search(FullTextSearchQuery::new("puppy".to_owned()))
+        .select(lancedb::query::Select::Columns(vec!["doc".to_owned()]))
+        .limit(10)
+        .only_if("meta='foo'")
+        .execute()
+        .await?;
+    ```
+
+With post-filtering:
+=== "Python"
+
+    ```python
+    table.search("puppy").limit(10).where("meta='foo'", prefilte=False).to_list()
    ```

 === "TypeScript"
@@ -130,6 +163,7 @@ This can be invoked via the familiar `where` syntax:
    .select(["id", "doc"])
    .limit(10)
    .where("meta='foo'")
+    .prefilter(false)
    .toArray();
    ```

@@ -140,6 +174,7 @@ This can be invoked via the familiar `where` syntax:
        .query()
        .full_text_search(FullTextSearchQuery::new(words[0].to_owned()))
        .select(lancedb::query::Select::Columns(vec!["doc".to_owned()]))
+        .postfilter()
        .limit(10)
        .only_if("meta='foo'")
        .execute()
@@ -160,3 +195,35 @@ To search for a phrase, the index must be created with `with_position=True`:
 table.create_fts_index("text", use_tantivy=False, with_position=True)
 ```
 This will allow you to search for phrases, but it will also significantly increase the index size and indexing time.
+
+
+## Incremental indexing
+
+LanceDB supports incremental indexing, which means you can add new records to the table without reindexing the entire table.
+
+This can make the query more efficient, especially when the table is large and the new records are relatively small.
+
+=== "Python"
+
+    ```python
+    table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
+    table.optimize()
+    ```
+
+=== "TypeScript"
+
+    ```typescript
+    await tbl.add([{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" }]);
+    await tbl.optimize();
+    ```
+
+=== "Rust"
+
+    ```rust
+    let more_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
+    tbl.add(more_data).execute().await?;
+    tbl.optimize(OptimizeAction::All).execute().await?;
+    ```
+!!! note
+
+    New data added after creating the FTS index will appear in search results while incremental index is still progress, but with increased latency due to a flat search on the unindexed portion. LanceDB Cloud automates this merging process, minimizing the impact on search speed. 
--- a/docs/src/fts_tantivy.md
+++ b/docs/src/fts_tantivy.md
@@ -153,9 +153,7 @@ table.create_fts_index(["title", "content"], use_tantivy=True, writer_heap_size=

 ## Current limitations

-1. Currently we do not yet support incremental writes.
-   If you add data after FTS index creation, it won't be reflected
-   in search results until you do a full reindex.
+1. New data added after creating the FTS index will appear in search results, but with increased latency due to a flat search on the unindexed portion. Re-indexing with `create_fts_index` will reduce latency. LanceDB Cloud automates this merging process, minimizing the impact on search speed. 

 2. We currently only support local filesystem paths for the FTS index.
   This is a tantivy limitation. We've implemented an object store plugin
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -274,7 +274,7 @@ table = db.create_table(table_name, schema=Content)

 Sometimes your data model may contain nested objects.
 For example, you may want to store the document string
-and the document soure name as a nested Document object:
+and the document source name as a nested Document object:

 ```python
 class Document(BaseModel):
@@ -466,7 +466,7 @@ You can create an empty table for scenarios where you want to add data to the ta

 ## Adding to a table

-After a table has been created, you can always add more data to it usind the `add` method
+After a table has been created, you can always add more data to it using the `add` method

 === "Python"
    You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
@@ -535,7 +535,7 @@ After a table has been created, you can always add more data to it usind the `ad
    ```

    ??? "Ingesting Pydantic models with LanceDB embedding API"
-        When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` feild as None to allow LanceDB to automatically vectorize the data.
+        When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` field as None to allow LanceDB to automatically vectorize the data.

        ```python
        import lancedb
@@ -790,6 +790,27 @@ Use the `drop_table()` method on the database to remove a table.
      This permanently removes the table and is not recoverable, unlike deleting rows.
      If the table does not exist an exception is raised.

+## Handling bad vectors
+
+In LanceDB Python, you can use the `on_bad_vectors` parameter to choose how
+invalid vector values are handled. Invalid vectors are vectors that are not valid
+because:
+
+1. They are the wrong dimension
+2. They contain NaN values
+3. They are null but are on a non-nullable field
+
+By default, LanceDB will raise an error if it encounters a bad vector. You can
+also choose one of the following options:
+
+* `drop`: Ignore rows with bad vectors
+* `fill`: Replace bad values (NaNs) or missing values (too few dimensions) with
+    the fill value specified in the `fill_value` parameter. An input like
+    `[1.0, NaN, 3.0]` will be replaced with `[1.0, 0.0, 3.0]` if `fill_value=0.0`.
+* `null`: Replace bad vectors with null (only works if the column is nullable).
+    A bad vector `[1.0, NaN, 3.0]` will be replaced with `null` if the column is
+    nullable. If the vector column is non-nullable, then bad vectors will cause an
+    error

 ## Consistency

@@ -859,4 +880,4 @@ There are three possible settings for `read_consistency_interval`:

 Learn the best practices on creating an ANN index and getting the most out of it.

-[^1]: The `vectordb` package is a legacy package that is  deprecated in favor of `@lancedb/lancedb`.  The `vectordb` package will continue to receive bug fixes and security updates until September 2024.  We recommend all new projects use `@lancedb/lancedb`.  See the [migration guide](migration.md) for more information.
+[^1]: The `vectordb` package is a legacy package that is  deprecated in favor of `@lancedb/lancedb`.  The `vectordb` package will continue to receive bug fixes and security updates until September 2024.  We recommend all new projects use `@lancedb/lancedb`.  See the [migration guide](../migration.md) for more information.
--- a/docs/src/reranking/cohere.md
+++ b/docs/src/reranking/cohere.md
@@ -6,6 +6,9 @@ This re-ranker uses the [Cohere](https://cohere.ai/) API to rerank the search re
 !!! note
    Supported Query Types: Hybrid, Vector, FTS

+```shell
+pip install cohere
+```

 ```python
 import numpy
--- a/docs/src/reranking/index.md
+++ b/docs/src/reranking/index.md
@@ -9,6 +9,7 @@ LanceDB comes with some built-in rerankers. Some of the rerankers that are avail
 | `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
 | `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
 | `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
+| `VoyageAIReranker` | Uses voyageai Reranker API to rerank results | Vector, FTS, Hybrid |


 ## Using a Reranker
@@ -73,6 +74,7 @@ LanceDB comes with some built-in rerankers. Here are some of the rerankers that
 - [Jina Reranker](./jina.md)
 - [AnswerDotAI Rerankers](./answerdotai.md)
 - [Reciprocal Rank Fusion Reranker](./rrf.md)
+- [VoyageAI Reranker](./voyageai.md)

 ## Creating Custom Rerankers

--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.13.0-beta.1</version>
+        <version>0.13.0-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.13.0-beta.1</version>
+    <version>0.13.0-final.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.13.0-beta.1",
+      "version": "0.13.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,12 +52,12 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.13.0-beta.1",
-        "@lancedb/vectordb-darwin-x64": "0.13.0-beta.1",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.1",
-        "@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.1",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.1",
-        "@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.1"
+        "@lancedb/vectordb-darwin-arm64": "0.13.0",
+        "@lancedb/vectordb-darwin-x64": "0.13.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.13.0",
+        "@lancedb/vectordb-win32-arm64-msvc": "0.13.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.13.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -328,9 +328,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0-beta.1.tgz",
-      "integrity": "sha512-beOrf6selCzzhLgDG8Nibma4nO/CSnA1wUKRmlJHEPtGcg7PW18z6MP/nfwQMpMR/FLRfTo8pPTbpzss47MiQQ==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0.tgz",
+      "integrity": "sha512-8hdcjkRmgrdQYf1jN+DyZae40LIv8UUfnWy70Uid5qy63sSvRW/+MvIdqIPFr9QlLUXmpyyQuX0y3bZhUR99cQ==",
      "cpu": [
        "arm64"
      ],
@@ -340,9 +340,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0-beta.1.tgz",
-      "integrity": "sha512-YdraGRF/RbJRkKh0v3xT03LUhq47T2GtCvJ5gZp8wKlh4pHa8LuhLU0DIdvmG/DT5vuQA+td8HDkBm/e3EOdNg==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0.tgz",
+      "integrity": "sha512-fWzAY4l5SQtNfMYh80v+M66ugZHhdxbkpk5mNEv6Zsug3DL6kRj3Uv31/i0wgzY6F5G3LUlbjZerN+eTnDLwOw==",
      "cpu": [
        "x64"
      ],
@@ -352,9 +352,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0-beta.1.tgz",
-      "integrity": "sha512-Pp0O/uhEqof1oLaWrNbv+Ym+q8kBkiCqaA5+2eAZ6a3e9U+Ozkvb0FQrHuyi9adJ5wKQ4NabyQE9BMf2bYpOnQ==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0.tgz",
+      "integrity": "sha512-ltwAT9baOSuR5YiGykQXPC8/HGYF13vpI47qxhP9yfgiz9pA8EUn8p8YrBRzq7J4DIZ4b8JSVDXQnMIqEtB4Kg==",
      "cpu": [
        "arm64"
      ],
@@ -364,9 +364,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0-beta.1.tgz",
-      "integrity": "sha512-y8nxOye4egfWF5FGED9EfkmZ1O5HnRLU4a61B8m5JSpkivO9v2epTcbYN0yt/7ZFCgtqMfJ8VW4Mi7qQcz3KDA==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0.tgz",
+      "integrity": "sha512-MiT/RBlMPGGRh7BX+MXwRuNiiUnKmuDcHH8nm88IH28T7TQxXIbA9w6UpSg5m9f3DgKQI2K8oLi29oKIB8ZwDQ==",
      "cpu": [
        "x64"
      ],
@@ -376,9 +376,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.13.0-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0-beta.1.tgz",
-      "integrity": "sha512-STMDP9dp0TBLkB3ro+16pKcGy6bmbhRuEZZZ1Tp5P75yTPeVh4zIgWkidMdU1qBbEYM7xacnsp9QAwgLnMU/Ow==",
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0.tgz",
+      "integrity": "sha512-SovP/hwWYLJIy65DKbVuXlBPTb/nwvVpTO6dh9zRch+L5ek6JmVAkwsfeTS2p5bMa8VPujsCXYUAVuCDEJU8wg==",
      "cpu": [
        "x64"
      ],
@@ -1501,9 +1501,9 @@
      "dev": true
    },
    "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
      "dev": true,
      "dependencies": {
        "path-key": "^3.1.0",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -89,11 +89,13 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-arm64": "0.13.0-beta.1",
-    "@lancedb/vectordb-darwin-x64": "0.13.0-beta.1",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.1",
-    "@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.1",
-    "@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.1",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.1"
+    "@lancedb/vectordb-darwin-x64": "0.13.0",
+    "@lancedb/vectordb-darwin-arm64": "0.13.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.13.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
+    "@lancedb/vectordb-linux-x64-musl": "0.13.0",
+    "@lancedb/vectordb-linux-arm64-musl": "0.13.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.13.0",
+    "@lancedb/vectordb-win32-arm64-msvc": "0.13.0"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.13.0-beta.1"
+version = "0.13.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -187,6 +187,81 @@ describe.each([arrow13, arrow14, arrow15, arrow16, arrow17])(
      },
    );

+    // TODO: https://github.com/lancedb/lancedb/issues/1832
+    it.skip("should be able to omit nullable fields", async () => {
+      const db = await connect(tmpDir.name);
+      const schema = new arrow.Schema([
+        new arrow.Field(
+          "vector",
+          new arrow.FixedSizeList(
+            2,
+            new arrow.Field("item", new arrow.Float64()),
+          ),
+          true,
+        ),
+        new arrow.Field("item", new arrow.Utf8(), true),
+        new arrow.Field("price", new arrow.Float64(), false),
+      ]);
+      const table = await db.createEmptyTable("test", schema);
+
+      const data1 = { item: "foo", price: 10.0 };
+      await table.add([data1]);
+      const data2 = { vector: [3.1, 4.1], price: 2.0 };
+      await table.add([data2]);
+      const data3 = { vector: [5.9, 26.5], item: "bar", price: 3.0 };
+      await table.add([data3]);
+
+      let res = await table.query().limit(10).toArray();
+      const resVector = res.map((r) => r.get("vector").toArray());
+      expect(resVector).toEqual([null, data2.vector, data3.vector]);
+      const resItem = res.map((r) => r.get("item").toArray());
+      expect(resItem).toEqual(["foo", null, "bar"]);
+      const resPrice = res.map((r) => r.get("price").toArray());
+      expect(resPrice).toEqual([10.0, 2.0, 3.0]);
+
+      const data4 = { item: "foo" };
+      // We can't omit a column if it's not nullable
+      await expect(table.add([data4])).rejects.toThrow("Invalid user input");
+
+      // But we can alter columns to make them nullable
+      await table.alterColumns([{ path: "price", nullable: true }]);
+      await table.add([data4]);
+
+      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
+      expect(res).toEqual([data1, data2, data3, data4]);
+    });
+
+    it("should be able to insert nullable data for non-nullable fields", async () => {
+      const db = await connect(tmpDir.name);
+      const schema = new arrow.Schema([
+        new arrow.Field("x", new arrow.Float64(), false),
+        new arrow.Field("id", new arrow.Utf8(), false),
+      ]);
+      const table = await db.createEmptyTable("test", schema);
+
+      const data1 = { x: 4.1, id: "foo" };
+      await table.add([data1]);
+      const res = (await table.query().toArray())[0];
+      expect(res.x).toEqual(data1.x);
+      expect(res.id).toEqual(data1.id);
+
+      const data2 = { x: null, id: "bar" };
+      await expect(table.add([data2])).rejects.toThrow(
+        "declared as non-nullable but contains null values",
+      );
+
+      // But we can alter columns to make them nullable
+      await table.alterColumns([{ path: "x", nullable: true }]);
+      await table.add([data2]);
+
+      const res2 = await table.query().toArray();
+      expect(res2.length).toBe(2);
+      expect(res2[0].x).toEqual(data1.x);
+      expect(res2[0].id).toEqual(data1.id);
+      expect(res2[1].x).toBeNull();
+      expect(res2[1].id).toEqual(data2.id);
+    });
+
    it("should return the table as an instance of an arrow table", async () => {
      const arrowTbl = await table.toArrow();
      expect(arrowTbl).toBeInstanceOf(ArrowTable);
@@ -402,6 +477,54 @@ describe("When creating an index", () => {
    expect(rst.numRows).toBe(1);
  });

+  it("should create and search IVF_HNSW indices", async () => {
+    await tbl.createIndex("vec", {
+      config: Index.hnswSq(),
+    });
+
+    // check index directory
+    const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
+    expect(fs.readdirSync(indexDir)).toHaveLength(1);
+    const indices = await tbl.listIndices();
+    expect(indices.length).toBe(1);
+    expect(indices[0]).toEqual({
+      name: "vec_idx",
+      indexType: "IvfHnswSq",
+      columns: ["vec"],
+    });
+
+    // Search without specifying the column
+    let rst = await tbl
+      .query()
+      .limit(2)
+      .nearestTo(queryVec)
+      .distanceType("dot")
+      .toArrow();
+    expect(rst.numRows).toBe(2);
+
+    // Search using `vectorSearch`
+    rst = await tbl.vectorSearch(queryVec).limit(2).toArrow();
+    expect(rst.numRows).toBe(2);
+
+    // Search with specifying the column
+    const rst2 = await tbl
+      .query()
+      .limit(2)
+      .nearestTo(queryVec)
+      .column("vec")
+      .toArrow();
+    expect(rst2.numRows).toBe(2);
+    expect(rst.toString()).toEqual(rst2.toString());
+
+    // test offset
+    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
+    expect(rst.numRows).toBe(1);
+
+    // test ef
+    rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
+    expect(rst.numRows).toBe(2);
+  });
+
  it("should be able to query unindexed data", async () => {
    await tbl.createIndex("vec");
    await tbl.add([
--- a/nodejs/examples/sentence-transformers.test.ts
+++ b/nodejs/examples/sentence-transformers.test.ts
@@ -6,12 +6,16 @@ import { withTempDirectory } from "./util.ts";
 import * as lancedb from "@lancedb/lancedb";
 import "@lancedb/lancedb/embedding/transformers";
 import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
+import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
 import { Utf8 } from "apache-arrow";

 test("full text search", async () => {
  await withTempDirectory(async (databaseDir) => {
    const db = await lancedb.connect(databaseDir);
-    const func = await getRegistry().get("huggingface").create();
+    console.log(getRegistry());
+    const func = (await getRegistry()
+      .get("huggingface")
+      ?.create()) as EmbeddingFunction;

    const facts = [
      "Albert Einstein was a theoretical physicist.",
@@ -56,4 +60,4 @@ test("full text search", async () => {

    expect(actual[0]["text"]).toBe("The human body has 206 bones.");
  });
-});
+}, 100_000);
--- a/nodejs/lancedb/embedding/index.ts
+++ b/nodejs/lancedb/embedding/index.ts
@@ -19,9 +19,6 @@ import { EmbeddingFunctionConfig, getRegistry } from "./registry";

 export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";

-// We need to explicitly export '*' so that the `register` decorator actually registers the class.
-export * from "./openai";
-export * from "./transformers";
 export * from "./registry";

 /**
--- a/nodejs/lancedb/embedding/registry.ts
+++ b/nodejs/lancedb/embedding/registry.ts
@@ -17,8 +17,6 @@ import {
  type EmbeddingFunctionConstructor,
 } from "./embedding_function";
 import "reflect-metadata";
-import { OpenAIEmbeddingFunction } from "./openai";
-import { TransformersEmbeddingFunction } from "./transformers";

 type CreateReturnType<T> = T extends { init: () => Promise<void> }
  ? Promise<T>
@@ -73,10 +71,6 @@ export class EmbeddingFunctionRegistry {
    };
  }

-  get(name: "openai"): EmbeddingFunctionCreate<OpenAIEmbeddingFunction>;
-  get(
-    name: "huggingface",
-  ): EmbeddingFunctionCreate<TransformersEmbeddingFunction>;
  get<T extends EmbeddingFunction<unknown>>(
    name: string,
  ): EmbeddingFunctionCreate<T> | undefined;
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -385,6 +385,20 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    return this;
  }

+  /**
+   * Set the number of candidates to consider during the search
+   *
+   * This argument is only used when the vector column has an HNSW index.
+   * If there is no index then this value is ignored.
+   *
+   * Increasing this value will increase the recall of your query but will
+   * also increase the latency of your query. The default value is 1.5*limit.
+   */
+  ef(ef: number): VectorQuery {
+    super.doCall((inner) => inner.ef(ef));
+    return this;
+  }
+
  /**
   * Set the vector column to query
   *
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -87,6 +87,12 @@ export interface OptimizeOptions {
  deleteUnverified: boolean;
 }

+export interface Version {
+  version: number;
+  timestamp: Date;
+  metadata: Record<string, string>;
+}
+
 /**
 * A Table is a collection of Records in a LanceDB Database.
 *
@@ -360,6 +366,11 @@ export abstract class Table {
   */
  abstract checkoutLatest(): Promise<void>;

+  /**
+   * List all the versions of the table
+   */
+  abstract listVersions(): Promise<Version[]>;
+
  /**
   * Restore the table to the currently checked out version
   *
@@ -659,6 +670,14 @@ export class LocalTable extends Table {
    await this.inner.checkoutLatest();
  }

+  async listVersions(): Promise<Version[]> {
+    return (await this.inner.listVersions()).map((version) => ({
+      version: version.version,
+      timestamp: new Date(version.timestamp / 1000),
+      metadata: version.metadata,
+    }));
+  }
+
  async restore(): Promise<void> {
    await this.inner.restore();
  }
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/README.md
+++ b/nodejs/npm/linux-arm64-musl/README.md
@@ -0,0 +1,3 @@
+# `@lancedb/lancedb-linux-arm64-musl`
+
+This is the **aarch64-unknown-linux-musl** binary for `@lancedb/lancedb`
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -0,0 +1,13 @@
+{
+	"name": "@lancedb/lancedb-linux-arm64-musl",
+	"version": "0.13.0",
+	"os": ["linux"],
+	"cpu": ["arm64"],
+	"main": "lancedb.linux-arm64-musl.node",
+	"files": ["lancedb.linux-arm64-musl.node"],
+	"license": "Apache 2.0",
+	"engines": {
+		"node": ">= 18"
+	},
+	"libc": ["musl"]
+}
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/README.md
+++ b/nodejs/npm/linux-x64-musl/README.md
@@ -0,0 +1,3 @@
+# `@lancedb/lancedb-linux-x64-musl`
+
+This is the **x86_64-unknown-linux-musl** binary for `@lancedb/lancedb`
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -0,0 +1,13 @@
+{
+	"name": "@lancedb/lancedb-linux-x64-musl",
+	"version": "0.13.0",
+	"os": ["linux"],
+	"cpu": ["x64"],
+	"main": "lancedb.linux-x64-musl.node",
+	"files": ["lancedb.linux-x64-musl.node"],
+	"license": "Apache 2.0",
+	"engines": {
+		"node": ">= 18"
+	},
+	"libc": ["musl"]
+}
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.13.0-beta.1",
+	"version": "0.13.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.13.0-beta.1",
+      "version": "0.13.0",
      "cpu": [
        "x64",
        "arm64"
@@ -6052,9 +6052,9 @@
      }
    },
    "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
      "devOptional": true,
      "dependencies": {
        "path-key": "^3.1.0",
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -10,11 +10,13 @@
    "vector database",
    "ann"
  ],
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
-    "./embedding": "./dist/embedding/index.js"
+    "./embedding": "./dist/embedding/index.js",
+    "./embedding/openai": "./dist/embedding/openai.js",
+    "./embedding/transformers": "./dist/embedding/transformers.js"
  },
  "types": "dist/index.d.ts",
  "napi": {
@@ -22,10 +24,12 @@
    "triples": {
      "defaults": false,
      "additional": [
-        "aarch64-apple-darwin",
-        "aarch64-unknown-linux-gnu",
        "x86_64-apple-darwin",
+        "aarch64-apple-darwin",
        "x86_64-unknown-linux-gnu",
+        "aarch64-unknown-linux-gnu",
+        "x86_64-unknown-linux-musl",
+        "aarch64-unknown-linux-musl",
        "x86_64-pc-windows-msvc"
      ]
    }
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -167,6 +167,11 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    #[napi]
+    pub fn ef(&mut self, ef: u32) {
+        self.inner = self.inner.clone().ef(ef as usize);
+    }
+
    #[napi]
    pub fn bypass_vector_index(&mut self) {
        self.inner = self.inner.clone().bypass_vector_index()
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
+
 use arrow_ipc::writer::FileWriter;
 use lancedb::ipc::ipc_file_to_batches;
 use lancedb::table::{
@@ -226,6 +228,28 @@ impl Table {
        self.inner_ref()?.checkout_latest().await.default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn list_versions(&self) -> napi::Result<Vec<Version>> {
+        self.inner_ref()?
+            .list_versions()
+            .await
+            .map(|versions| {
+                versions
+                    .iter()
+                    .map(|version| Version {
+                        version: version.version as i64,
+                        timestamp: version.timestamp.timestamp_micros(),
+                        metadata: version
+                            .metadata
+                            .iter()
+                            .map(|(k, v)| (k.clone(), v.clone()))
+                            .collect(),
+                    })
+                    .collect()
+            })
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn restore(&self) -> napi::Result<()> {
        self.inner_ref()?.restore().await.default_error()
@@ -466,3 +490,10 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
        }
    }
 }
+
+#[napi(object)]
+pub struct Version {
+    pub version: i64,
+    pub timestamp: i64,
+    pub metadata: HashMap<String, String>,
+}
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.16.0-beta.1"
+current_version = "0.16.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.16.0-beta.1"
+version = "0.16.1-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -15,7 +15,7 @@ crate-type = ["cdylib"]

 [dependencies]
 arrow = { version = "52.1", features = ["pyarrow"] }
-lancedb = { path = "../rust/lancedb" }
+lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
 pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
 # Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
@@ -33,6 +33,11 @@ pyo3-build-config = { version = "0.20.3", features = [
 ] }

 [features]
-default = ["remote"]
+default = ["default-tls", "remote"]
 fp16kernels = ["lancedb/fp16kernels"]
 remote = ["lancedb/remote"]
+
+# TLS
+default-tls = ["lancedb/default-tls"]
+native-tls = ["lancedb/native-tls"]
+rustls-tls = ["lancedb/rustls-tls"]
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ name = "lancedb"
 dependencies = [
    "deprecation",
    "nest-asyncio~=1.0",
-    "pylance==0.19.2",
+    "pylance==0.20.0b2",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "packaging",
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -131,6 +131,8 @@ class Query(pydantic.BaseModel):

    fast_search: bool = False

+    ef: Optional[int] = None
+

 class LanceQueryBuilder(ABC):
    """An abstract query builder. Subclasses are defined for vector search,
@@ -257,6 +259,7 @@ class LanceQueryBuilder(ABC):
        self._with_row_id = False
        self._vector = None
        self._text = None
+        self._ef = None

    @deprecation.deprecated(
        deprecated_in="0.3.1",
@@ -367,11 +370,13 @@ class LanceQueryBuilder(ABC):
        ----------
        limit: int
            The maximum number of results to return.
-            By default the query is limited to the first 10.
-            Call this method and pass 0, a negative value,
-            or None to remove the limit.
-            *WARNING* if you have a large dataset, removing
-            the limit can potentially result in reading a
+            The default query limit is 10 results.
+            For ANN/KNN queries, you must specify a limit.
+            Entering 0, a negative number, or None will reset
+            the limit to the default value of 10.
+            *WARNING* if you have a large dataset, setting
+            the limit to a large number, e.g. the table size,
+            can potentially result in reading a
            large amount of data into memory and cause
            out of memory issues.

@@ -638,6 +643,28 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        self._nprobes = nprobes
        return self

+    def ef(self, ef: int) -> LanceVectorQueryBuilder:
+        """Set the number of candidates to consider during search.
+
+        Higher values will yield better recall (more likely to find vectors if
+        they exist) at the expense of latency.
+
+        This only applies to the HNSW-related index.
+        The default value is 1.5 * limit.
+
+        Parameters
+        ----------
+        ef: int
+            The number of candidates to consider during search.
+
+        Returns
+        -------
+        LanceVectorQueryBuilder
+            The LanceQueryBuilder object.
+        """
+        self._ef = ef
+        return self
+
    def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
        """Set the refine factor to use, increasing the number of vectors sampled.

@@ -700,6 +727,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
            with_row_id=self._with_row_id,
            offset=self._offset,
            fast_search=self._fast_search,
+            ef=self._ef,
        )
        result_set = self._table._execute_query(query, batch_size)
        if self._reranker is not None:
@@ -1071,6 +1099,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            self._vector_query.nprobes(self._nprobes)
        if self._refine_factor:
            self._vector_query.refine_factor(self._refine_factor)
+        if self._ef:
+            self._vector_query.ef(self._ef)

        with ThreadPoolExecutor() as executor:
            fts_future = executor.submit(self._fts_query.with_row_id(True).to_arrow)
@@ -1197,6 +1227,29 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._nprobes = nprobes
        return self

+    def ef(self, ef: int) -> LanceHybridQueryBuilder:
+        """
+        Set the number of candidates to consider during search.
+
+        Higher values will yield better recall (more likely to find vectors if
+        they exist) at the expense of latency.
+
+        This only applies to the HNSW-related index.
+        The default value is 1.5 * limit.
+
+        Parameters
+        ----------
+        ef: int
+            The number of candidates to consider during search.
+
+        Returns
+        -------
+        LanceHybridQueryBuilder
+            The LanceHybridQueryBuilder object.
+        """
+        self._ef = ef
+        return self
+
    def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
        """Set the distance metric to use.

@@ -1495,7 +1548,8 @@ class AsyncQuery(AsyncQueryBase):
        return pa.array(vec)

    def nearest_to(
-        self, query_vector: Optional[Union[VEC, Tuple, List[VEC]]] = None
+        self,
+        query_vector: Union[VEC, Tuple, List[VEC]],
    ) -> AsyncVectorQuery:
        """
        Find the nearest vectors to the given query vector.
@@ -1542,6 +1596,9 @@ class AsyncQuery(AsyncQueryBase):
        will be added to the results.  This column will contain the index of the
        query vector that the result is nearest to.
        """
+        if query_vector is None:
+            raise ValueError("query_vector can not be None")
+
        if (
            isinstance(query_vector, list)
            and len(query_vector) > 0
@@ -1618,7 +1675,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        """
        Set the number of partitions to search (probe)

-        This argument is only used when the vector column has an IVF PQ index.
+        This argument is only used when the vector column has an IVF-based index.
        If there is no index then this value is ignored.

        The IVF stage of IVF PQ divides the input into partitions (clusters) of
@@ -1640,6 +1697,21 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.nprobes(nprobes)
        return self

+    def ef(self, ef: int) -> AsyncVectorQuery:
+        """
+        Set the number of candidates to consider during search
+
+        This argument is only used when the vector column has an HNSW index.
+        If there is no index then this value is ignored.
+
+        Increasing this value will increase the recall of your query but will also
+        increase the latency of your query.  The default value is 1.5 * limit.  This
+        default is good for many cases but the best value to use will depend on your
+        data and the recall that you need to achieve.
+        """
+        self._inner.ef(ef)
+        return self
+
    def refine_factor(self, refine_factor: int) -> AsyncVectorQuery:
        """
        A multiplier to control how many additional rows are taken during the refine
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -78,6 +78,10 @@ class RemoteTable(Table):
            self.schema.metadata
        )

+    def list_versions(self):
+        """List all versions of the table"""
+        return self._loop.run_until_complete(self._table.list_versions())
+
    def to_arrow(self) -> pa.Table:
        """to_arrow() is not yet supported on LanceDB cloud."""
        raise NotImplementedError("to_arrow() is not yet supported on LanceDB cloud.")
@@ -86,6 +90,12 @@ class RemoteTable(Table):
        """to_pandas() is not yet supported on LanceDB cloud."""
        return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

+    def checkout(self, version):
+        return self._loop.run_until_complete(self._table.checkout(version))
+
+    def checkout_latest(self):
+        return self._loop.run_until_complete(self._table.checkout_latest())
+
    def list_indices(self):
        """List all the indices on the table"""
        return self._loop.run_until_complete(self._table.list_indices())
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -41,7 +41,7 @@ class CohereReranker(Reranker):

    def __init__(
        self,
-        model_name: str = "rerank-english-v2.0",
+        model_name: str = "rerank-english-v3.0",
        column: str = "text",
        top_n: Union[int, None] = None,
        return_score="relevance",
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -8,7 +8,7 @@ import inspect
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from datetime import timedelta
+from datetime import datetime, timedelta
 from functools import cached_property
 from typing import (
    TYPE_CHECKING,
@@ -1012,6 +1012,39 @@ class Table(ABC):
            The names of the columns to drop.
        """

+    @abstractmethod
+    def checkout(self):
+        """
+        Checks out a specific version of the Table
+
+        Any read operation on the table will now access the data at the checked out
+        version. As a consequence, calling this method will disable any read consistency
+        interval that was previously set.
+
+        This is a read-only operation that turns the table into a sort of "view"
+        or "detached head".  Other table instances will not be affected.  To make the
+        change permanent you can use the `[Self::restore]` method.
+
+        Any operation that modifies the table will fail while the table is in a checked
+        out state.
+
+        To return the table to a normal state use `[Self::checkout_latest]`
+        """
+
+    @abstractmethod
+    def checkout_latest(self):
+        """
+        Ensures the table is pointing at the latest version
+
+        This can be used to manually update a table when the read_consistency_interval
+        is None
+        It can also be used to undo a `[Self::checkout]` operation
+        """
+
+    @abstractmethod
+    def list_versions(self):
+        """List all versions of the table"""
+
    @cached_property
    def _dataset_uri(self) -> str:
        return _table_uri(self._conn.uri, self.name)
@@ -1567,7 +1600,7 @@ class LanceTable(Table):
            "append" and "overwrite".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
-            One of "error", "drop", "fill".
+            One of "error", "drop", "fill", "null".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".

@@ -1851,7 +1884,7 @@ class LanceTable(Table):
            data but will validate against any schema that's specified.
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
-            One of "error", "drop", "fill".
+            One of "error", "drop", "fill", "null".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".
        embedding_functions: list of EmbeddingFunctionModel, default None
@@ -1959,6 +1992,7 @@ class LanceTable(Table):
                "metric": query.metric,
                "nprobes": query.nprobes,
                "refine_factor": query.refine_factor,
+                "ef": query.ef,
            }
        return ds.scanner(
            columns=query.columns,
@@ -2151,13 +2185,11 @@ def _sanitize_schema(
        vector column to fixed_size_list(float32) if necessary.
    on_bad_vectors: str, default "error"
        What to do if any of the vectors are not the same size or contains NaNs.
-        One of "error", "drop", "fill".
+        One of "error", "drop", "fill", "null".
    fill_value: float, default 0.
        The value to use when filling vectors. Only used if on_bad_vectors="fill".
    """
    if schema is not None:
-        if data.schema == schema:
-            return data
        # cast the columns to the expected types
        data = data.combine_chunks()
        for field in schema:
@@ -2177,6 +2209,7 @@ def _sanitize_schema(
                    vector_column_name=field.name,
                    on_bad_vectors=on_bad_vectors,
                    fill_value=fill_value,
+                    table_schema=schema,
                )
        return pa.Table.from_arrays(
            [data[name] for name in schema.names], schema=schema
@@ -2197,6 +2230,7 @@ def _sanitize_schema(
 def _sanitize_vector_column(
    data: pa.Table,
    vector_column_name: str,
+    table_schema: Optional[pa.Schema] = None,
    on_bad_vectors: str = "error",
    fill_value: float = 0.0,
 ) -> pa.Table:
@@ -2211,12 +2245,16 @@ def _sanitize_vector_column(
        The name of the vector column.
    on_bad_vectors: str, default "error"
        What to do if any of the vectors are not the same size or contains NaNs.
-        One of "error", "drop", "fill".
+        One of "error", "drop", "fill", "null".
    fill_value: float, default 0.0
        The value to use when filling vectors. Only used if on_bad_vectors="fill".
    """
    # ChunkedArray is annoying to work with, so we combine chunks here
    vec_arr = data[vector_column_name].combine_chunks()
+    if table_schema is not None:
+        field = table_schema.field(vector_column_name)
+    else:
+        field = None
    typ = data[vector_column_name].type
    if pa.types.is_list(typ) or pa.types.is_large_list(typ):
        # if it's a variable size list array,
@@ -2243,7 +2281,11 @@ def _sanitize_vector_column(
                data, fill_value, on_bad_vectors, vec_arr, vector_column_name
            )
    else:
-        if pc.any(pc.is_null(vec_arr.values, nan_is_null=True)).as_py():
+        if (
+            field is not None
+            and not field.nullable
+            and pc.any(pc.is_null(vec_arr.values)).as_py()
+        ) or (pc.any(pc.is_nan(vec_arr.values)).as_py()):
            data = _sanitize_nans(
                data, fill_value, on_bad_vectors, vec_arr, vector_column_name
            )
@@ -2287,6 +2329,12 @@ def _sanitize_jagged(data, fill_value, on_bad_vectors, vec_arr, vector_column_na
        )
    elif on_bad_vectors == "drop":
        data = data.filter(correct_ndims)
+    elif on_bad_vectors == "null":
+        data = data.set_column(
+            data.column_names.index(vector_column_name),
+            vector_column_name,
+            pc.if_else(correct_ndims, vec_arr, pa.scalar(None)),
+        )
    return data


@@ -2303,7 +2351,8 @@ def _sanitize_nans(
        raise ValueError(
            f"Vector column {vector_column_name} has NaNs. "
            "Set on_bad_vectors='drop' to remove them, or "
-            "set on_bad_vectors='fill' and fill_value=<value> to replace them."
+            "set on_bad_vectors='fill' and fill_value=<value> to replace them. "
+            "Or set on_bad_vectors='null' to replace them with null."
        )
    elif on_bad_vectors == "fill":
        if fill_value is None:
@@ -2323,6 +2372,17 @@ def _sanitize_nans(
        np_arr = np_arr.reshape(-1, vec_arr.type.list_size)
        not_nulls = np.any(np_arr, axis=1)
        data = data.filter(~not_nulls)
+    elif on_bad_vectors == "null":
+        # null = pa.nulls(len(vec_arr)).cast(vec_arr.type)
+        # values = pc.if_else(pc.is_nan(vec_arr.values), fill_value, vec_arr.values)
+        np_arr = np.isnan(vec_arr.values.to_numpy(zero_copy_only=False))
+        np_arr = np_arr.reshape(-1, vec_arr.type.list_size)
+        no_nans = np.any(np_arr, axis=1)
+        data = data.set_column(
+            data.column_names.index(vector_column_name),
+            vector_column_name,
+            pc.if_else(no_nans, vec_arr, pa.scalar(None)),
+        )
    return data


@@ -2588,7 +2648,7 @@ class AsyncTable:
            "append" and "overwrite".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
-            One of "error", "drop", "fill".
+            One of "error", "drop", "fill", "null".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".

@@ -2671,7 +2731,7 @@ class AsyncTable:

    def vector_search(
        self,
-        query_vector: Optional[Union[VEC, Tuple]] = None,
+        query_vector: Union[VEC, Tuple],
    ) -> AsyncVectorQuery:
        """
        Search the table with a given query vector.
@@ -2710,6 +2770,8 @@ class AsyncTable:
                async_query = async_query.refine_factor(query.refine_factor)
            if query.vector_column:
                async_query = async_query.column(query.vector_column)
+            if query.ef:
+                async_query = async_query.ef(query.ef)

        if not query.prefilter:
            async_query = async_query.postfilter()
@@ -2873,6 +2935,19 @@ class AsyncTable:
        """
        return await self._inner.version()

+    async def list_versions(self):
+        """
+        List all versions of the table
+        """
+        versions = await self._inner.list_versions()
+        for v in versions:
+            ts_nanos = v["timestamp"]
+            v["timestamp"] = datetime.fromtimestamp(ts_nanos // 1e9) + timedelta(
+                microseconds=(ts_nanos % 1e9) // 1e3
+            )
+
+        return versions
+
    async def checkout(self, version):
        """
        Checks out a specific version of the Table
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -81,14 +81,15 @@ def test_embedding_function(tmp_path):


 def test_embedding_with_bad_results(tmp_path):
-    @register("mock-embedding")
-    class MockEmbeddingFunction(TextEmbeddingFunction):
+    @register("null-embedding")
+    class NullEmbeddingFunction(TextEmbeddingFunction):
        def ndims(self):
            return 128

        def generate_embeddings(
            self, texts: Union[List[str], np.ndarray]
        ) -> list[Union[np.array, None]]:
+            # Return None, which is bad if field is non-nullable
            return [
                None if i % 2 == 0 else np.random.randn(self.ndims())
                for i in range(len(texts))
@@ -96,13 +97,17 @@ def test_embedding_with_bad_results(tmp_path):

    db = lancedb.connect(tmp_path)
    registry = EmbeddingFunctionRegistry.get_instance()
-    model = registry.get("mock-embedding").create()
+    model = registry.get("null-embedding").create()

    class Schema(LanceModel):
        text: str = model.SourceField()
        vector: Vector(model.ndims()) = model.VectorField()

    table = db.create_table("test", schema=Schema, mode="overwrite")
+    with pytest.raises(ValueError):
+        # Default on_bad_vectors is "error"
+        table.add([{"text": "hello world"}])
+
    table.add(
        [{"text": "hello world"}, {"text": "bar"}],
        on_bad_vectors="drop",
@@ -112,13 +117,33 @@ def test_embedding_with_bad_results(tmp_path):
    assert len(table) == 1
    assert df.iloc[0]["text"] == "bar"

-    # table = db.create_table("test2", schema=Schema, mode="overwrite")
-    # table.add(
-    #     [{"text": "hello world"}, {"text": "bar"}],
-    # )
-    # assert len(table) == 2
-    # tbl = table.to_arrow()
-    # assert tbl["vector"].null_count == 1
+    @register("nan-embedding")
+    class NanEmbeddingFunction(TextEmbeddingFunction):
+        def ndims(self):
+            return 128
+
+        def generate_embeddings(
+            self, texts: Union[List[str], np.ndarray]
+        ) -> list[Union[np.array, None]]:
+            # Return NaN to produce bad vectors
+            return [
+                [np.NAN] * 128 if i % 2 == 0 else np.random.randn(self.ndims())
+                for i in range(len(texts))
+            ]
+
+    db = lancedb.connect(tmp_path)
+    registry = EmbeddingFunctionRegistry.get_instance()
+    model = registry.get("nan-embedding").create()
+
+    table = db.create_table("test2", schema=Schema, mode="overwrite")
+    table.alter_columns(dict(path="vector", nullable=True))
+    table.add(
+        [{"text": "hello world"}, {"text": "bar"}],
+        on_bad_vectors="null",
+    )
+    assert len(table) == 2
+    tbl = table.to_arrow()
+    assert tbl["vector"].null_count == 1


 def test_with_existing_vectors(tmp_path):
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1,21 +1,9 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import unittest.mock as mock
 from datetime import timedelta
-from typing import Optional

-import lance
 import lancedb
 from lancedb.index import IvfPq
 import numpy as np
@@ -23,41 +11,15 @@ import pandas.testing as tm
 import pyarrow as pa
 import pytest
 import pytest_asyncio
-from lancedb.db import LanceDBConnection
 from lancedb.pydantic import LanceModel, Vector
 from lancedb.query import AsyncQueryBase, LanceVectorQueryBuilder, Query
 from lancedb.table import AsyncTable, LanceTable


-class MockTable:
-    def __init__(self, tmp_path):
-        self.uri = tmp_path
-        self._conn = LanceDBConnection(self.uri)
-
-    def to_lance(self):
-        return lance.dataset(self.uri)
-
-    def _execute_query(self, query, batch_size: Optional[int] = None):
-        ds = self.to_lance()
-        return ds.scanner(
-            columns=query.columns,
-            filter=query.filter,
-            prefilter=query.prefilter,
-            nearest={
-                "column": query.vector_column,
-                "q": query.vector,
-                "k": query.k,
-                "metric": query.metric,
-                "nprobes": query.nprobes,
-                "refine_factor": query.refine_factor,
-            },
-            batch_size=batch_size,
-            offset=query.offset,
-        ).to_reader()
-
-
-@pytest.fixture
-def table(tmp_path) -> MockTable:
+@pytest.fixture(scope="module")
+def table(tmpdir_factory) -> lancedb.table.Table:
+    tmp_path = str(tmpdir_factory.mktemp("data"))
+    db = lancedb.connect(tmp_path)
    df = pa.table(
        {
            "vector": pa.array(
@@ -68,8 +30,7 @@ def table(tmp_path) -> MockTable:
            "float_field": pa.array([1.0, 2.0]),
        }
    )
-    lance.write_dataset(df, tmp_path)
-    return MockTable(tmp_path)
+    return db.create_table("test", df)


@pytest_asyncio.fixture
@@ -126,6 +87,12 @@ def test_query_builder(table):
    assert all(np.array(rs[0]["vector"]) == [1, 2])


+def test_with_row_id(table: lancedb.table.Table):
+    rs = table.search().with_row_id(True).to_arrow()
+    assert "_rowid" in rs.column_names
+    assert rs["_rowid"].to_pylist() == [0, 1]
+
+
 def test_vector_query_with_no_limit(table):
    with pytest.raises(ValueError):
        LanceVectorQueryBuilder(table, [0, 0], "vector").limit(0).select(
@@ -365,6 +332,12 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
    assert df.shape == (0, 4)


+@pytest.mark.asyncio
+async def test_none_query(table_async: AsyncTable):
+    with pytest.raises(ValueError):
+        await table_async.query().nearest_to(None).to_arrow()
+
+
@pytest.mark.asyncio
 async def test_fast_search_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -103,6 +103,47 @@ async def test_async_remote_db():
        assert table_names == []


+@pytest.mark.asyncio
+async def test_async_checkout():
+    def handler(request):
+        if request.path == "/v1/table/test/describe/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            response = json.dumps({"version": 42, "schema": {"fields": []}})
+            request.wfile.write(response.encode())
+            return
+
+        content_len = int(request.headers.get("Content-Length"))
+        body = request.rfile.read(content_len)
+        body = json.loads(body)
+
+        print("body is", body)
+
+        count = 0
+        if body["version"] == 1:
+            count = 100
+        elif body["version"] == 2:
+            count = 200
+        elif body["version"] is None:
+            count = 300
+
+        request.send_response(200)
+        request.send_header("Content-Type", "application/json")
+        request.end_headers()
+        request.wfile.write(json.dumps(count).encode())
+
+    async with mock_lancedb_connection_async(handler) as db:
+        table = await db.open_table("test")
+        assert await table.count_rows() == 300
+        await table.checkout(1)
+        assert await table.count_rows() == 100
+        await table.checkout(2)
+        assert await table.count_rows() == 200
+        await table.checkout_latest()
+        assert await table.count_rows() == 300
+
+
@pytest.mark.asyncio
 async def test_http_error():
    request_id_holder = {"request_id": None}
@@ -185,8 +226,10 @@ def test_query_sync_minimal():
            "k": 10,
            "prefilter": False,
            "refine_factor": None,
+            "ef": None,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 20,
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -204,6 +247,7 @@ def test_query_sync_empty_query():
            "filter": "true",
            "vector": [],
            "columns": ["id"],
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -223,11 +267,13 @@ def test_query_sync_maximal():
            "refine_factor": 10,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 5,
+            "ef": None,
            "filter": "id > 0",
            "columns": ["id", "name"],
            "vector_column": "vector2",
            "fast_search": True,
            "with_row_id": True,
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
@@ -266,6 +312,7 @@ def test_query_sync_fts():
            },
            "k": 10,
            "vector": [],
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -282,6 +329,7 @@ def test_query_sync_fts():
            "k": 42,
            "vector": [],
            "with_row_id": True,
+            "version": None,
        }

        return pa.table({"id": [1, 2, 3]})
@@ -307,6 +355,7 @@ def test_query_sync_hybrid():
                "k": 42,
                "vector": [],
                "with_row_id": True,
+                "version": None,
            }
            return pa.table({"_rowid": [1, 2, 3], "_score": [0.1, 0.2, 0.3]})
        else:
@@ -318,7 +367,9 @@ def test_query_sync_hybrid():
                "refine_factor": None,
                "vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                "nprobes": 20,
+                "ef": None,
                "with_row_id": True,
+                "version": None,
            }
            return pa.table({"_rowid": [1, 2, 3], "_distance": [0.1, 0.2, 0.3]})

--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -240,6 +240,121 @@ def test_add(db):
    _add(table, schema)


+def test_add_subschema(tmp_path):
+    db = lancedb.connect(tmp_path)
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("item", pa.string(), nullable=True),
+            pa.field("price", pa.float64(), nullable=False),
+        ]
+    )
+    table = db.create_table("test", schema=schema)
+
+    data = {"price": 10.0, "item": "foo"}
+    table.add([data])
+    data = {"price": 2.0, "vector": [3.1, 4.1]}
+    table.add([data])
+    data = {"price": 3.0, "vector": [5.9, 26.5], "item": "bar"}
+    table.add([data])
+
+    expected = pa.table(
+        {
+            "vector": [None, [3.1, 4.1], [5.9, 26.5]],
+            "item": ["foo", None, "bar"],
+            "price": [10.0, 2.0, 3.0],
+        },
+        schema=schema,
+    )
+    assert table.to_arrow() == expected
+
+    data = {"item": "foo"}
+    # We can't omit a column if it's not nullable
+    with pytest.raises(OSError, match="Invalid user input"):
+        table.add([data])
+
+    # We can add it if we make the column nullable
+    table.alter_columns(dict(path="price", nullable=True))
+    table.add([data])
+
+    expected_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("item", pa.string(), nullable=True),
+            pa.field("price", pa.float64(), nullable=True),
+        ]
+    )
+    expected = pa.table(
+        {
+            "vector": [None, [3.1, 4.1], [5.9, 26.5], None],
+            "item": ["foo", None, "bar", "foo"],
+            "price": [10.0, 2.0, 3.0, None],
+        },
+        schema=expected_schema,
+    )
+    assert table.to_arrow() == expected
+
+
+def test_add_nullability(tmp_path):
+    db = lancedb.connect(tmp_path)
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=False),
+            pa.field("id", pa.string(), nullable=False),
+        ]
+    )
+    table = db.create_table("test", schema=schema)
+
+    nullable_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("id", pa.string(), nullable=True),
+        ]
+    )
+    data = pa.table(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5]],
+            "id": ["foo", "bar"],
+        },
+        schema=nullable_schema,
+    )
+    # We can add nullable schema if it doesn't actually contain nulls
+    table.add(data)
+
+    expected = data.cast(schema)
+    assert table.to_arrow() == expected
+
+    data = pa.table(
+        {
+            "vector": [None],
+            "id": ["baz"],
+        },
+        schema=nullable_schema,
+    )
+    # We can't add nullable schema if it contains nulls
+    with pytest.raises(Exception, match="Vector column vector has NaNs"):
+        table.add(data)
+
+    # But we can make it nullable
+    table.alter_columns(dict(path="vector", nullable=True))
+    table.add(data)
+
+    expected_schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
+            pa.field("id", pa.string(), nullable=False),
+        ]
+    )
+    expected = pa.table(
+        {
+            "vector": [[3.1, 4.1], [5.9, 26.5], None],
+            "id": ["foo", "bar", "baz"],
+        },
+        schema=expected_schema,
+    )
+    assert table.to_arrow() == expected
+
+
 def test_add_pydantic_model(db):
    # https://github.com/lancedb/lancedb/issues/562

--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -195,6 +195,10 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    pub fn ef(&mut self, ef: u32) {
+        self.inner = self.inner.clone().ef(ef as usize);
+    }
+
    pub fn bypass_vector_index(&mut self) {
        self.inner = self.inner.clone().bypass_vector_index()
    }
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -8,7 +8,7 @@ use lancedb::table::{
 use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pymethods,
-    types::{PyDict, PyDictMethods, PyString},
+    types::{IntoPyDict, PyDict, PyDictMethods, PyString},
    Bound, FromPyObject, PyAny, PyRef, PyResult, Python, ToPyObject,
 };
 use pyo3_asyncio_0_21::tokio::future_into_py;
@@ -246,6 +246,33 @@ impl Table {
        )
    }

+    pub fn list_versions(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            let versions = inner.list_versions().await.infer_error()?;
+            let versions_as_dict = Python::with_gil(|py| {
+                versions
+                    .iter()
+                    .map(|v| {
+                        let dict = PyDict::new_bound(py);
+                        dict.set_item("version", v.version).unwrap();
+                        dict.set_item(
+                            "timestamp",
+                            v.timestamp.timestamp_nanos_opt().unwrap_or_default(),
+                        )
+                        .unwrap();
+
+                        let tup: Vec<(&String, &String)> = v.metadata.iter().collect();
+                        dict.set_item("metadata", tup.into_py_dict(py)).unwrap();
+                        dict.to_object(py)
+                    })
+                    .collect::<Vec<_>>()
+            });
+
+            Ok(versions_as_dict)
+        })
+    }
+
    pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.13.0-beta.1"
+version = "0.13.0"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.13.0-beta.1"
+version = "0.13.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -46,10 +46,18 @@ serde = { version = "^1" }
 serde_json = { version = "1" }
 async-openai = { version = "0.20.0", optional = true }
 serde_with = { version = "3.8.1" }
+aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
 # For remote feature
-reqwest = { version = "0.12.0", features = ["gzip", "json", "stream"], optional = true }
-rand = { version = "0.8.3", features = ["small_rng"], optional = true}
-http = { version = "1",  optional = true } # Matching what is in reqwest
+reqwest = { version = "0.12.0", default-features = false, features = [
+    "charset",
+    "gzip",
+    "http2",
+    "json",
+    "macos-system-configuration",
+    "stream",
+], optional = true }
+rand = { version = "0.8.3", features = ["small_rng"], optional = true }
+http = { version = "1", optional = true } # Matching what is in reqwest
 uuid = { version = "1.7.0", features = ["v4"], optional = true }
 polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
 polars = { version = ">=0.37,<0.40.0", optional = true }
@@ -72,11 +80,13 @@ aws-config = { version = "1.0" }
 aws-smithy-runtime = { version = "1.3" }
 http-body = "1" # Matching reqwest

+
 [features]
-default = []
+default = ["default-tls"]
 remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
 fp16kernels = ["lance-linalg/fp16kernels"]
 s3-test = []
+bedrock = ["dep:aws-sdk-bedrockruntime"]
 openai = ["dep:async-openai", "dep:reqwest"]
 polars = ["dep:polars-arrow", "dep:polars"]
 sentence-transformers = [
@@ -87,6 +97,11 @@ sentence-transformers = [
    "dep:tokenizers"
 ]

+# TLS
+default-tls = ["reqwest?/default-tls"]
+native-tls = ["reqwest?/native-tls"]
+rustls-tls = ["reqwest?/rustls-tls"]
+
 [[example]]
 name = "openai"
 required-features = ["openai"]
@@ -94,3 +109,7 @@ required-features = ["openai"]
 [[example]]
 name = "sentence_transformers"
 required-features = ["sentence-transformers"]
+
+[[example]]
+name = "bedrock"
+required-features = ["bedrock"]
--- a/rust/lancedb/examples/bedrock.rs
+++ b/rust/lancedb/examples/bedrock.rs
@@ -0,0 +1,89 @@
+use std::{iter::once, sync::Arc};
+
+use arrow_array::{Float64Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray};
+use arrow_schema::{DataType, Field, Schema};
+use aws_config::Region;
+use aws_sdk_bedrockruntime::Client;
+use futures::StreamExt;
+use lancedb::{
+    arrow::IntoArrow,
+    connect,
+    embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
+    query::{ExecutableQuery, QueryBase},
+    Result,
+};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let tempdir = tempfile::tempdir().unwrap();
+    let tempdir = tempdir.path().to_str().unwrap();
+
+    // create Bedrock embedding function
+    let region: String = "us-east-1".to_string();
+    let config = aws_config::defaults(aws_config::BehaviorVersion::latest())
+        .region(Region::new(region))
+        .load()
+        .await;
+
+    let embedding = Arc::new(BedrockEmbeddingFunction::new(
+        Client::new(&config), // AWS Region
+    ));
+
+    let db = connect(tempdir).execute().await?;
+    db.embedding_registry()
+        .register("bedrock", embedding.clone())?;
+
+    let table = db
+        .create_table("vectors", make_data())
+        .add_embedding(EmbeddingDefinition::new(
+            "text",
+            "bedrock",
+            Some("embeddings"),
+        ))?
+        .execute()
+        .await?;
+
+    // execute vector search
+    let query = Arc::new(StringArray::from_iter_values(once("something warm")));
+    let query_vector = embedding.compute_query_embeddings(query)?;
+    let mut results = table
+        .vector_search(query_vector)?
+        .limit(1)
+        .execute()
+        .await?;
+
+    let rb = results.next().await.unwrap()?;
+    let out = rb
+        .column_by_name("text")
+        .unwrap()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    let text = out.iter().next().unwrap().unwrap();
+    println!("Closest match: {}", text);
+    Ok(())
+}
+
+fn make_data() -> impl IntoArrow {
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, true),
+        Field::new("text", DataType::Utf8, false),
+        Field::new("price", DataType::Float64, false),
+    ]);
+
+    let id = Int32Array::from(vec![1, 2, 3, 4]);
+    let text = StringArray::from_iter_values(vec![
+        "Black T-Shirt",
+        "Leather Jacket",
+        "Winter Parka",
+        "Hooded Sweatshirt",
+    ]);
+    let price = Float64Array::from(vec![10.0, 50.0, 100.0, 30.0]);
+    let schema = Arc::new(schema);
+    let rb = RecordBatch::try_new(
+        schema.clone(),
+        vec![Arc::new(id), Arc::new(text), Arc::new(price)],
+    )
+    .unwrap();
+    Box::new(RecordBatchIterator::new(vec![Ok(rb)], schema))
+}
--- a/rust/lancedb/src/embeddings.rs
+++ b/rust/lancedb/src/embeddings.rs
@@ -17,6 +17,9 @@ pub mod openai;
 #[cfg(feature = "sentence-transformers")]
 pub mod sentence_transformers;

+#[cfg(feature = "bedrock")]
+pub mod bedrock;
+
 use lance::arrow::RecordBatchExt;
 use std::{
    borrow::Cow,
--- a/rust/lancedb/src/embeddings/bedrock.rs
+++ b/rust/lancedb/src/embeddings/bedrock.rs
@@ -0,0 +1,210 @@
+use aws_sdk_bedrockruntime::Client as BedrockClient;
+use std::{borrow::Cow, fmt::Formatter, str::FromStr, sync::Arc};
+
+use arrow::array::{AsArray, Float32Builder};
+use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
+use arrow_data::ArrayData;
+use arrow_schema::DataType;
+use serde_json::{json, Value};
+
+use super::EmbeddingFunction;
+use crate::{Error, Result};
+
+use tokio::runtime::Handle;
+use tokio::task::block_in_place;
+
+#[derive(Debug)]
+pub enum BedrockEmbeddingModel {
+    TitanEmbedding,
+    CohereLarge,
+}
+
+impl BedrockEmbeddingModel {
+    fn ndims(&self) -> usize {
+        match self {
+            Self::TitanEmbedding => 1536,
+            Self::CohereLarge => 1024,
+        }
+    }
+
+    fn model_id(&self) -> &str {
+        match self {
+            Self::TitanEmbedding => "amazon.titan-embed-text-v1",
+            Self::CohereLarge => "cohere.embed-english-v3",
+        }
+    }
+}
+
+impl FromStr for BedrockEmbeddingModel {
+    type Err = Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s {
+            "titan-embed-text-v1" => Ok(Self::TitanEmbedding),
+            "cohere-embed-english-v3" => Ok(Self::CohereLarge),
+            _ => Err(Error::InvalidInput {
+                message: "Invalid model. Available models are: 'titan-embed-text-v1', 'cohere-embed-english-v3'".to_string()
+            }),
+        }
+    }
+}
+
+pub struct BedrockEmbeddingFunction {
+    model: BedrockEmbeddingModel,
+    client: BedrockClient,
+}
+
+impl BedrockEmbeddingFunction {
+    pub fn new(client: BedrockClient) -> Self {
+        Self {
+            model: BedrockEmbeddingModel::TitanEmbedding,
+            client,
+        }
+    }
+
+    pub fn with_model(client: BedrockClient, model: BedrockEmbeddingModel) -> Self {
+        Self { model, client }
+    }
+}
+
+impl EmbeddingFunction for BedrockEmbeddingFunction {
+    fn name(&self) -> &str {
+        "bedrock"
+    }
+
+    fn source_type(&self) -> Result<Cow<DataType>> {
+        Ok(Cow::Owned(DataType::Utf8))
+    }
+
+    fn dest_type(&self) -> Result<Cow<DataType>> {
+        let n_dims = self.model.ndims();
+        Ok(Cow::Owned(DataType::new_fixed_size_list(
+            DataType::Float32,
+            n_dims as i32,
+            false,
+        )))
+    }
+
+    fn compute_source_embeddings(&self, source: ArrayRef) -> Result<ArrayRef> {
+        let len = source.len();
+        let n_dims = self.model.ndims();
+        let inner = self.compute_inner(source)?;
+
+        let fsl = DataType::new_fixed_size_list(DataType::Float32, n_dims as i32, false);
+
+        let array_data = ArrayData::builder(fsl)
+            .len(len)
+            .add_child_data(inner.into_data())
+            .build()?;
+
+        Ok(Arc::new(FixedSizeListArray::from(array_data)))
+    }
+
+    fn compute_query_embeddings(&self, input: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
+        let arr = self.compute_inner(input)?;
+        Ok(Arc::new(arr))
+    }
+}
+
+impl std::fmt::Debug for BedrockEmbeddingFunction {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("BedrockEmbeddingFunction")
+            .field("model", &self.model)
+            // Skip client field as it doesn't implement Debug
+            .finish()
+    }
+}
+
+impl BedrockEmbeddingFunction {
+    fn compute_inner(&self, source: Arc<dyn Array>) -> Result<Float32Array> {
+        if source.is_nullable() {
+            return Err(Error::InvalidInput {
+                message: "Expected non-nullable data type".to_string(),
+            });
+        }
+
+        if !matches!(source.data_type(), DataType::Utf8 | DataType::LargeUtf8) {
+            return Err(Error::InvalidInput {
+                message: "Expected Utf8 data type".to_string(),
+            });
+        }
+
+        let mut builder = Float32Builder::new();
+
+        let texts = match source.data_type() {
+            DataType::Utf8 => source
+                .as_string::<i32>()
+                .into_iter()
+                .map(|s| s.expect("array is non-nullable").to_string())
+                .collect::<Vec<String>>(),
+            DataType::LargeUtf8 => source
+                .as_string::<i64>()
+                .into_iter()
+                .map(|s| s.expect("array is non-nullable").to_string())
+                .collect::<Vec<String>>(),
+            _ => unreachable!(),
+        };
+
+        for text in texts {
+            let request_body = match self.model {
+                BedrockEmbeddingModel::TitanEmbedding => {
+                    json!({
+                        "inputText": text
+                    })
+                }
+                BedrockEmbeddingModel::CohereLarge => {
+                    json!({
+                        "texts": [text],
+                        "input_type": "search_document"
+                    })
+                }
+            };
+
+            let client = self.client.clone();
+            let model_id = self.model.model_id().to_string();
+            let request_body = request_body.clone();
+
+            let response = block_in_place(move || {
+                Handle::current().block_on(async move {
+                    client
+                        .invoke_model()
+                        .model_id(model_id)
+                        .body(aws_sdk_bedrockruntime::primitives::Blob::new(
+                            serde_json::to_vec(&request_body).unwrap(),
+                        ))
+                        .send()
+                        .await
+                })
+            })
+            .unwrap();
+
+            let response_json: Value =
+                serde_json::from_slice(response.body.as_ref()).map_err(|e| Error::Runtime {
+                    message: format!("Failed to parse response: {}", e),
+                })?;
+
+            let embedding = match self.model {
+                BedrockEmbeddingModel::TitanEmbedding => response_json["embedding"]
+                    .as_array()
+                    .ok_or_else(|| Error::Runtime {
+                        message: "Missing embedding in response".to_string(),
+                    })?
+                    .iter()
+                    .map(|v| v.as_f64().unwrap() as f32)
+                    .collect::<Vec<f32>>(),
+                BedrockEmbeddingModel::CohereLarge => response_json["embeddings"][0]
+                    .as_array()
+                    .ok_or_else(|| Error::Runtime {
+                        message: "Missing embeddings in response".to_string(),
+                    })?
+                    .iter()
+                    .map(|v| v.as_f64().unwrap() as f32)
+                    .collect::<Vec<f32>>(),
+            };
+
+            builder.append_slice(&embedding);
+        }
+
+        Ok(builder.finish())
+    }
+}
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -704,6 +704,9 @@ pub struct VectorQuery {
    // IVF PQ - ANN search.
    pub(crate) query_vector: Vec<Arc<dyn Array>>,
    pub(crate) nprobes: usize,
+    // The number of candidates to return during the refine step for HNSW,
+    // defaults to 1.5 * limit.
+    pub(crate) ef: Option<usize>,
    pub(crate) refine_factor: Option<u32>,
    pub(crate) distance_type: Option<DistanceType>,
    /// Default is true. Set to false to enforce a brute force search.
@@ -717,6 +720,7 @@ impl VectorQuery {
            column: None,
            query_vector: Vec::new(),
            nprobes: 20,
+            ef: None,
            refine_factor: None,
            distance_type: None,
            use_index: true,
@@ -776,6 +780,18 @@ impl VectorQuery {
        self
    }

+    /// Set the number of candidates to return during the refine step for HNSW
+    ///
+    /// This argument is only used when the vector column has an HNSW index.
+    /// If there is no index then this value is ignored.
+    ///
+    /// Increasing this value will increase the recall of your query but will
+    /// also increase the latency of your query.  The default value is 1.5*limit.
+    pub fn ef(mut self, ef: usize) -> Self {
+        self.ef = Some(ef);
+        self
+    }
+
    /// A multiplier to control how many additional rows are taken during the refine step
    ///
    /// This argument is only used when the vector column has an IVF PQ index.
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -19,9 +19,10 @@ use http::header::CONTENT_TYPE;
 use http::StatusCode;
 use lance::arrow::json::JsonSchema;
 use lance::dataset::scanner::DatasetRecordBatchStream;
-use lance::dataset::{ColumnAlteration, NewColumnTransform};
+use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
 use lance_datafusion::exec::OneShotExec;
 use serde::{Deserialize, Serialize};
+use tokio::sync::RwLock;

 use crate::{
    connection::NoData,
@@ -43,17 +44,32 @@ pub struct RemoteTable<S: HttpSend = Sender> {
    #[allow(dead_code)]
    client: RestfulLanceDbClient<S>,
    name: String,
+
+    version: RwLock<Option<u64>>,
 }

 impl<S: HttpSend> RemoteTable<S> {
    pub fn new(client: RestfulLanceDbClient<S>, name: String) -> Self {
-        Self { client, name }
+        Self {
+            client,
+            name,
+            version: RwLock::new(None),
+        }
    }

    async fn describe(&self) -> Result<TableDescription> {
-        let request = self
+        let version = self.current_version().await;
+        self.describe_version(version).await
+    }
+
+    async fn describe_version(&self, version: Option<u64>) -> Result<TableDescription> {
+        let mut request = self
            .client
            .post(&format!("/v1/table/{}/describe/", self.name));
+
+        let body = serde_json::json!({ "version": version });
+        request = request.json(&body);
+
        let (request_id, response) = self.client.send(request, true).await?;

        let response = self.check_table_response(&request_id, response).await?;
@@ -196,6 +212,7 @@ impl<S: HttpSend> RemoteTable<S> {
        body["prefilter"] = query.base.prefilter.into();
        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
        body["nprobes"] = query.nprobes.into();
+        body["ef"] = query.ef.into();
        body["refine_factor"] = query.refine_factor.into();
        if let Some(vector_column) = query.column.as_ref() {
            body["vector_column"] = serde_json::Value::String(vector_column.clone());
@@ -250,6 +267,24 @@ impl<S: HttpSend> RemoteTable<S> {
            }
        }
    }
+
+    async fn check_mutable(&self) -> Result<()> {
+        let read_guard = self.version.read().await;
+        match *read_guard {
+            None => Ok(()),
+            Some(version) => Err(Error::NotSupported {
+                message: format!(
+                    "Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
+                    version
+                )
+            })
+        }
+    }
+
+    async fn current_version(&self) -> Option<u64> {
+        let read_guard = self.version.read().await;
+        *read_guard
+    }
 }

 #[derive(Deserialize)]
@@ -277,7 +312,11 @@ mod test_utils {
            T: Into<reqwest::Body>,
        {
            let client = client_with_handler(handler);
-            Self { client, name }
+            Self {
+                client,
+                name,
+                version: RwLock::new(None),
+            }
        }
    }
 }
@@ -296,21 +335,62 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    async fn version(&self) -> Result<u64> {
        self.describe().await.map(|desc| desc.version)
    }
-    async fn checkout(&self, _version: u64) -> Result<()> {
-        Err(Error::NotSupported {
-            message: "checkout is not supported on LanceDB cloud.".into(),
-        })
+    async fn checkout(&self, version: u64) -> Result<()> {
+        // check that the version exists
+        self.describe_version(Some(version))
+            .await
+            .map_err(|e| match e {
+                // try to map the error to a more user-friendly error telling them
+                // specifically that the version does not exist
+                Error::TableNotFound { name } => Error::TableNotFound {
+                    name: format!("{} (version: {})", name, version),
+                },
+                e => e,
+            })?;
+
+        let mut write_guard = self.version.write().await;
+        *write_guard = Some(version);
+        Ok(())
    }
    async fn checkout_latest(&self) -> Result<()> {
-        Err(Error::NotSupported {
-            message: "checkout is not supported on LanceDB cloud.".into(),
-        })
+        let mut write_guard = self.version.write().await;
+        *write_guard = None;
+        Ok(())
    }
    async fn restore(&self) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "restore is not supported on LanceDB cloud.".into(),
        })
    }
+
+    async fn list_versions(&self) -> Result<Vec<Version>> {
+        let request = self
+            .client
+            .post(&format!("/v1/table/{}/version/list/", self.name));
+        let (request_id, response) = self.client.send(request, true).await?;
+        let response = self.check_table_response(&request_id, response).await?;
+
+        #[derive(Deserialize)]
+        struct ListVersionsResponse {
+            versions: Vec<Version>,
+        }
+
+        let body = response.text().await.err_to_http(request_id.clone())?;
+        let body: ListVersionsResponse =
+            serde_json::from_str(&body).map_err(|err| Error::Http {
+                source: format!(
+                    "Failed to parse list_versions response: {}, body: {}",
+                    err, body
+                )
+                .into(),
+                request_id,
+                status_code: None,
+            })?;
+
+        Ok(body.versions)
+    }
+
    async fn schema(&self) -> Result<SchemaRef> {
        let schema = self.describe().await?.schema;
        Ok(Arc::new(schema.try_into()?))
@@ -320,10 +400,13 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
            .client
            .post(&format!("/v1/table/{}/count_rows/", self.name));

+        let version = self.current_version().await;
+
        if let Some(filter) = filter {
-            request = request.json(&serde_json::json!({ "predicate": filter }));
+            request = request.json(&serde_json::json!({ "predicate": filter, "version": version }));
        } else {
-            request = request.json(&serde_json::json!({}));
+            let body = serde_json::json!({ "version": version });
+            request = request.json(&body);
        }

        let (request_id, response) = self.client.send(request, true).await?;
@@ -343,6 +426,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        add: AddDataBuilder<NoData>,
        data: Box<dyn RecordBatchReader + Send>,
    ) -> Result<()> {
+        self.check_mutable().await?;
        let body = Self::reader_as_body(data)?;
        let mut request = self
            .client
@@ -371,7 +455,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    ) -> Result<Arc<dyn ExecutionPlan>> {
        let request = self.client.post(&format!("/v1/table/{}/query/", self.name));

-        let body = serde_json::Value::Object(Default::default());
+        let version = self.current_version().await;
+        let body = serde_json::json!({ "version": version });
        let bodies = Self::apply_vector_query_params(body, query)?;

        let mut futures = Vec::with_capacity(bodies.len());
@@ -406,7 +491,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
            .post(&format!("/v1/table/{}/query/", self.name))
            .header(CONTENT_TYPE, JSON_CONTENT_TYPE);

-        let mut body = serde_json::Value::Object(Default::default());
+        let version = self.current_version().await;
+        let mut body = serde_json::json!({ "version": version });
        Self::apply_query_params(&mut body, query)?;
        // Empty vector can be passed if no vector search is performed.
        body["vector"] = serde_json::Value::Array(Vec::new());
@@ -420,6 +506,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        Ok(DatasetRecordBatchStream::new(stream))
    }
    async fn update(&self, update: UpdateBuilder) -> Result<u64> {
+        self.check_mutable().await?;
        let request = self
            .client
            .post(&format!("/v1/table/{}/update/", self.name));
@@ -441,6 +528,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
    }
    async fn delete(&self, predicate: &str) -> Result<()> {
+        self.check_mutable().await?;
        let body = serde_json::json!({ "predicate": predicate });
        let request = self
            .client
@@ -452,6 +540,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    }

    async fn create_index(&self, mut index: IndexBuilder) -> Result<()> {
+        self.check_mutable().await?;
        let request = self
            .client
            .post(&format!("/v1/table/{}/create_index/", self.name));
@@ -530,6 +619,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        params: MergeInsertBuilder,
        new_data: Box<dyn RecordBatchReader + Send>,
    ) -> Result<()> {
+        self.check_mutable().await?;
        let query = MergeInsertRequest::try_from(params)?;
        let body = Self::reader_as_body(new_data)?;
        let request = self
@@ -546,6 +636,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        Ok(())
    }
    async fn optimize(&self, _action: OptimizeAction) -> Result<OptimizeStats> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "optimize is not supported on LanceDB cloud.".into(),
        })
@@ -555,16 +646,19 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        _transforms: NewColumnTransform,
        _read_columns: Option<Vec<String>>,
    ) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "add_columns is not yet supported.".into(),
        })
    }
    async fn alter_columns(&self, _alterations: &[ColumnAlteration]) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "alter_columns is not yet supported.".into(),
        })
    }
    async fn drop_columns(&self, _columns: &[&str]) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "drop_columns is not yet supported.".into(),
        })
@@ -572,9 +666,13 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {

    async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
        // Make request to list the indices
-        let request = self
+        let mut request = self
            .client
            .post(&format!("/v1/table/{}/index/list/", self.name));
+        let version = self.current_version().await;
+        let body = serde_json::json!({ "version": version });
+        request = request.json(&body);
+
        let (request_id, response) = self.client.send(request, true).await?;
        let response = self.check_table_response(&request_id, response).await?;

@@ -624,10 +722,14 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    }

    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
-        let request = self.client.post(&format!(
+        let mut request = self.client.post(&format!(
            "/v1/table/{}/index/{}/stats/",
            self.name, index_name
        ));
+        let version = self.current_version().await;
+        let body = serde_json::json!({ "version": version });
+        request = request.json(&body);
+
        let (request_id, response) = self.client.send(request, true).await?;

        if response.status() == StatusCode::NOT_FOUND {
@@ -701,6 +803,7 @@ mod tests {
    use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
    use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
    use arrow_schema::{DataType, Field, Schema};
+    use chrono::{DateTime, Utc};
    use futures::{future::BoxFuture, StreamExt, TryFutureExt};
    use lance_index::scalar::FullTextSearchQuery;
    use reqwest::Body;
@@ -805,7 +908,10 @@ mod tests {
                request.headers().get("Content-Type").unwrap(),
                JSON_CONTENT_TYPE
            );
-            assert_eq!(request.body().unwrap().as_bytes().unwrap(), br#"{}"#);
+            assert_eq!(
+                request.body().unwrap().as_bytes().unwrap(),
+                br#"{"version":null}"#
+            );

            http::Response::builder().status(200).body("42").unwrap()
        });
@@ -822,7 +928,7 @@ mod tests {
            );
            assert_eq!(
                request.body().unwrap().as_bytes().unwrap(),
-                br#"{"predicate":"a > 10"}"#
+                br#"{"predicate":"a > 10","version":null}"#
            );

            http::Response::builder().status(200).body("42").unwrap()
@@ -1121,7 +1227,9 @@ mod tests {
                "prefilter": true,
                "distance_type": "l2",
                "nprobes": 20,
+                "ef": Option::<usize>::None,
                "refine_factor": null,
+                "version": null,
            });
            // Pass vector separately to make sure it matches f32 precision.
            expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
@@ -1166,7 +1274,9 @@ mod tests {
                "bypass_vector_index": true,
                "columns": ["a", "b"],
                "nprobes": 12,
+                "ef": Option::<usize>::None,
                "refine_factor": 2,
+                "version": null,
            });
            // Pass vector separately to make sure it matches f32 precision.
            expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
@@ -1222,6 +1332,7 @@ mod tests {
                "k": 10,
                "vector": [],
                "with_row_id": true,
+                "version": null
            });
            assert_eq!(body, expected_body);

@@ -1407,6 +1518,51 @@ mod tests {
        assert_eq!(indices, expected);
    }

+    #[tokio::test]
+    async fn test_list_versions() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(request.url().path(), "/v1/table/my_table/version/list/");
+
+            let version1 = lance::dataset::Version {
+                version: 1,
+                timestamp: "2024-01-01T00:00:00Z".parse().unwrap(),
+                metadata: Default::default(),
+            };
+            let version2 = lance::dataset::Version {
+                version: 2,
+                timestamp: "2024-02-01T00:00:00Z".parse().unwrap(),
+                metadata: Default::default(),
+            };
+            let response_body = serde_json::json!({
+                "versions": [
+                    version1,
+                    version2,
+                ]
+            });
+            let response_body = serde_json::to_string(&response_body).unwrap();
+
+            http::Response::builder()
+                .status(200)
+                .body(response_body)
+                .unwrap()
+        });
+
+        let versions = table.list_versions().await.unwrap();
+        assert_eq!(versions.len(), 2);
+        assert_eq!(versions[0].version, 1);
+        assert_eq!(
+            versions[0].timestamp,
+            "2024-01-01T00:00:00Z".parse::<DateTime<Utc>>().unwrap()
+        );
+        assert_eq!(versions[1].version, 2);
+        assert_eq!(
+            versions[1].timestamp,
+            "2024-02-01T00:00:00Z".parse::<DateTime<Utc>>().unwrap()
+        );
+        // assert_eq!(versions, expected);
+    }
+
    #[tokio::test]
    async fn test_index_stats() {
        let table = Table::new_with_handler("my_table", |request| {
@@ -1451,4 +1607,195 @@ mod tests {
        let indices = table.index_stats("my_index").await.unwrap();
        assert!(indices.is_none());
    }
+
+    #[tokio::test]
+    async fn test_passes_version() {
+        let table = Table::new_with_handler("my_table", |request| {
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            let version = body
+                .as_object()
+                .unwrap()
+                .get("version")
+                .unwrap()
+                .as_u64()
+                .unwrap();
+            assert_eq!(version, 42);
+
+            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    serde_json::json!({
+                        "version": 42,
+                        "schema": { "fields": [] }
+                    })
+                }
+                "/v1/table/my_table/index/list/" => {
+                    serde_json::json!({
+                        "indexes": []
+                    })
+                }
+                "/v1/table/my_table/index/my_idx/stats/" => {
+                    serde_json::json!({
+                        "num_indexed_rows": 100000,
+                        "num_unindexed_rows": 0,
+                        "index_type": "IVF_PQ",
+                        "distance_type": "l2"
+                    })
+                }
+                "/v1/table/my_table/count_rows/" => {
+                    serde_json::json!(1000)
+                }
+                "/v1/table/my_table/query/" => {
+                    let expected_data = RecordBatch::try_new(
+                        Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+                        vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+                    )
+                    .unwrap();
+                    let expected_data_ref = expected_data.clone();
+                    let response_body = write_ipc_file(&expected_data_ref);
+                    return http::Response::builder()
+                        .status(200)
+                        .header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
+                        .body(response_body)
+                        .unwrap();
+                }
+
+                path => panic!("Unexpected path: {}", path),
+            };
+
+            http::Response::builder()
+                .status(200)
+                .body(
+                    serde_json::to_string(&response_body)
+                        .unwrap()
+                        .as_bytes()
+                        .to_vec(),
+                )
+                .unwrap()
+        });
+
+        table.checkout(42).await.unwrap();
+
+        // ensure that version is passed to the /describe endpoint
+        let version = table.version().await.unwrap();
+        assert_eq!(version, 42);
+
+        // ensure it's passed to other read API calls
+        table.list_indices().await.unwrap();
+        table.index_stats("my_idx").await.unwrap();
+        table.count_rows(None).await.unwrap();
+        table
+            .query()
+            .nearest_to(vec![0.1, 0.2, 0.3])
+            .unwrap()
+            .execute()
+            .await
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_fails_if_checkout_version_doesnt_exist() {
+        let table = Table::new_with_handler("my_table", |request| {
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            let version = body
+                .as_object()
+                .unwrap()
+                .get("version")
+                .unwrap()
+                .as_u64()
+                .unwrap();
+            if version != 42 {
+                return http::Response::builder()
+                    .status(404)
+                    .body(format!("Table my_table (version: {}) not found", version))
+                    .unwrap();
+            }
+
+            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    serde_json::json!({
+                        "version": 42,
+                        "schema": { "fields": [] }
+                    })
+                }
+                _ => panic!("Unexpected path"),
+            };
+
+            http::Response::builder()
+                .status(200)
+                .body(serde_json::to_string(&response_body).unwrap())
+                .unwrap()
+        });
+
+        let res = table.checkout(43).await;
+        println!("{:?}", res);
+        assert!(
+            matches!(res, Err(Error::TableNotFound { name }) if name == "my_table (version: 43)")
+        );
+    }
+
+    #[tokio::test]
+    async fn test_timetravel_immutable() {
+        let table = Table::new_with_handler::<String>("my_table", |request| {
+            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    serde_json::json!({
+                        "version": 42,
+                        "schema": { "fields": [] }
+                    })
+                }
+                _ => panic!("Should not have made a request: {:?}", request),
+            };
+
+            http::Response::builder()
+                .status(200)
+                .body(serde_json::to_string(&response_body).unwrap())
+                .unwrap()
+        });
+
+        table.checkout(42).await.unwrap();
+
+        // Ensure that all mutable operations fail.
+        let res = table
+            .update()
+            .column("a", "a + 1")
+            .column("b", "b - 1")
+            .only_if("b > 10")
+            .execute()
+            .await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let batch = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let data = Box::new(RecordBatchIterator::new(
+            [Ok(batch.clone())],
+            batch.schema(),
+        ));
+        let res = table.merge_insert(&["some_col"]).execute(data).await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let res = table.delete("id in (1, 2, 3)").await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let data = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let res = table
+            .add(RecordBatchIterator::new([Ok(data.clone())], data.schema()))
+            .execute()
+            .await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let res = table
+            .create_index(&["a"], Index::IvfPq(Default::default()))
+            .execute()
+            .await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+    }
 }
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -37,7 +37,7 @@ pub use lance::dataset::ColumnAlteration;
 pub use lance::dataset::NewColumnTransform;
 pub use lance::dataset::ReadParams;
 use lance::dataset::{
-    Dataset, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
+    Dataset, UpdateBuilder as LanceUpdateBuilder, Version, WhenMatched, WriteMode, WriteParams,
 };
 use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
 use lance::io::WrappingObjectStore;
@@ -426,6 +426,7 @@ pub(crate) trait TableInternal: std::fmt::Display + std::fmt::Debug + Send + Syn
    async fn checkout(&self, version: u64) -> Result<()>;
    async fn checkout_latest(&self) -> Result<()>;
    async fn restore(&self) -> Result<()>;
+    async fn list_versions(&self) -> Result<Vec<Version>>;
    async fn table_definition(&self) -> Result<TableDefinition>;
    fn dataset_uri(&self) -> &str;
 }
@@ -955,6 +956,11 @@ impl Table {
        self.inner.restore().await
    }

+    /// List all the versions of the table
+    pub async fn list_versions(&self) -> Result<Vec<Version>> {
+        self.inner.list_versions().await
+    }
+
    /// List all indices that have been created with [`Self::create_index`]
    pub async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
        self.inner.list_indices().await
@@ -1319,7 +1325,7 @@ impl NativeTable {
        let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?;
        Ok(indices
            .iter()
-            .map(|i| VectorIndex::new_from_format(&mf, i))
+            .map(|i| VectorIndex::new_from_format(&(mf.0), i))
            .collect())
    }

@@ -1707,6 +1713,10 @@ impl TableInternal for NativeTable {
        self.dataset.reload().await
    }

+    async fn list_versions(&self) -> Result<Vec<Version>> {
+        Ok(self.dataset.get().await?.versions().await?)
+    }
+
    async fn restore(&self) -> Result<()> {
        let version =
            self.dataset
@@ -1904,6 +1914,9 @@ impl TableInternal for NativeTable {
            query.base.offset.map(|offset| offset as i64),
        )?;
        scanner.nprobs(query.nprobes);
+        if let Some(ef) = query.ef {
+            scanner.ef(ef);
+        }
        scanner.use_index(query.use_index);
        scanner.prefilter(query.base.prefilter);
        match query.base.select {
Author	SHA1	Message	Date
Lance Release	96933d7df8	Bump version: 0.16.0 → 0.16.1-beta.0	2024-11-21 21:52:39 +00:00
Lei Xu	d369233b3d	feat: bump lance to 0.20.0b2 (#1865 ) Bump lance version. Upstream change log: https://github.com/lancedb/lance/releases/tag/v0.20.0-beta.2	2024-11-21 13:16:59 -08:00
QianZhu	43a670ed4b	fix: `limit` docstring change (#1860 )	2024-11-21 10:50:50 -08:00
Bert	cb9a00a28d	feat: add list_versions to typescript, rust and remote python sdks (#1850 ) Will require update to lance dependency to bring in this change which makes the version serializable https://github.com/lancedb/lance/pull/3143	2024-11-21 13:35:14 -05:00
Max Epstein	72af977a73	fix(CohereReranker): updated default model_name param to newest v3 (#1862 )	2024-11-21 09:02:49 -08:00
Bert	7cecb71df0	feat: support for checkout and checkout_latest in remote sdks (#1863 )	2024-11-21 11:28:46 -05:00
QianZhu	285071e5c8	docs: full-text search doc update (#1861 ) Co-authored-by: BubbleCal <bubble-cal@outlook.com>	2024-11-20 21:07:30 -08:00
QianZhu	114866fbcf	docs: OSS doc improvement (#1859 ) OSS doc improvement - HNSW index parameter explanation and others. --------- Co-authored-by: BubbleCal <bubble-cal@outlook.com>	2024-11-20 17:51:11 -08:00
Frank Liu	5387c0e243	docs: add Voyage models to sidebar (#1858 )	2024-11-20 14:20:14 -08:00
Mr. Doge	53d1535de1	ci: musl x64,arm64 (#1853 ) untested 4 artifacts at: https://github.com/FuPeiJiang/lancedb/actions/runs/11926579058 node-native-linux-aarch64-musl 22.6 MB node-native-linux-x86_64-musl 23.6 MB nodejs-native-linux-aarch64-musl 26.7 MB nodejs-native-linux-x86_64-musl 27 MB this follows the same process as: https://github.com/lancedb/lancedb/pull/1816#issuecomment-2484816669 Closes #1388 Closes #1107 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2024-11-20 10:53:19 -08:00
BubbleCal	b2f88f0b29	feat: support to sepcify ef search param (#1844 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-19 23:12:25 +08:00
fzowl	f2e3989831	docs: voyageai embedding in the index (#1813 ) The code to support VoyageAI embedding and rerank models was added in the https://github.com/lancedb/lancedb/pull/1799 PR. Some of the documentation changes was also made, here adding the VoyageAI embedding doc link to the index page. These are my first PRs in lancedb and while i checked the documentation/code structure, i might missed something important. Please let me know if any changes required!	2024-11-18 14:34:16 -08:00
Emmanuel Ferdman	83ae52938a	docs: update migration reference (#1837 ) # PR Summary PR fixes the `migration.md` reference in `docs/src/guides/tables.md`. On the way, it also fixes some typos found in that document. Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>	2024-11-18 14:33:32 -08:00
Lei Xu	267aa83bf8	feat(python): check vector query is not None (#1847 ) Fix the type hints of `nearest_to` method, and raise `ValueError` when the input is None	2024-11-18 14:15:22 -08:00
Will Jones	cc72050206	chore: update package locks (#1845 ) Also ran `npm audit`.	2024-11-18 13:44:06 -08:00
Will Jones	72543c8b9d	test(python): test `with_row_id` in sync query (#1835 ) Also remove weird `MockTable` fixture.	2024-11-18 11:32:52 -08:00
Will Jones	97d6210c33	ci: remove invalid references (#1834 ) Fix release job	2024-11-18 11:32:44 -08:00
Ho Kim	a3d0c27b0a	feat: add support for rustls (#1842 ) Hello, this is a simple PR that supports `rustls-tls` feature. The `reqwest`\`s default TLS `default-tls` is enabled by default, to dismiss the side-effect. The user can use `rustls-tls` like this: ```toml lancedb = { version = "*", default-features = false, features = ["rustls-tls"] } ```	2024-11-18 10:36:20 -08:00
BubbleCal	b23d8abcdd	docs: introduce incremental indexing for FTS (#1789 ) don't merge it before https://github.com/lancedb/lancedb/pull/1769 merged --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-18 20:21:28 +08:00
Rob Meng	e3ea5cf9b9	chore: bump lance to 0.19.3 (#1839 )	2024-11-16 14:57:52 -05:00
Lance Release	4f8b086175	Updating package-lock.json	2024-11-15 20:18:16 +00:00
Lance Release	72330fb759	Bump version: 0.13.0-beta.3 → 0.13.0	2024-11-15 20:17:59 +00:00
Lance Release	e3b2c5f438	Bump version: 0.13.0-beta.2 → 0.13.0-beta.3	2024-11-15 20:17:55 +00:00
Lance Release	66a881b33a	Bump version: 0.16.0-beta.2 → 0.16.0	2024-11-15 20:17:34 +00:00
Lance Release	a7515d6ee2	Bump version: 0.16.0-beta.1 → 0.16.0-beta.2	2024-11-15 20:17:34 +00:00
Will Jones	587c0824af	feat: flexible null handling and insert subschemas in Python (#1827 ) * Test that we can insert subschemas (omit nullable columns) in Python. * More work is needed to support this in Node. See: https://github.com/lancedb/lancedb/issues/1832 * Test that we can insert data with nullable schema but no nulls in non-nullable schema. * Add `"null"` option for `on_bad_vectors` where we fill with null if the vector is bad. * Make null values not considered bad if the field itself is nullable.	2024-11-15 11:33:00 -08:00
Will Jones	b38a4269d0	fix(node): make openai and huggingface optional dependencies (#1809 ) BREAKING CHANGE: openai and huggingface now have separate entrypoints. Closes [#1624](https://github.com/lancedb/lancedb/issues/1624)	2024-11-14 15:04:35 -08:00
Will Jones	119d88b9db	ci: disable Windows Arm64 until the release builds work (#1833 ) Started to actually fix this, but it was taking too long https://github.com/lancedb/lancedb/pull/1831	2024-11-14 15:04:23 -08:00
StevenSu	74f660d223	feat: add new feature, add amazon bedrock embedding function (#1788 ) Add amazon bedrock embedding function to rust sdk. 1. Add BedrockEmbeddingModel ( lancedb/src/embeddings/bedrock.rs) 2. Add example lancedb/examples/bedrock.rs	2024-11-14 11:04:59 -08:00
Lance Release	b2b0979b90	Updating package-lock.json	2024-11-14 04:42:38 +00:00
Lance Release	ee2a40b182	Bump version: 0.13.0-beta.1 → 0.13.0-beta.2	2024-11-14 04:42:19 +00:00