Bump version: 0.21.2-beta.0 → 0.21.3-beta.0

feat: add columns using pyarrow schema (#2284 )
chore(java): make rust release to be a switch option (#2277 )
2025-12-23 05:19:58 +00:00 · 2025-03-28 16:03:17 +00:00 · 2025-03-28 08:51:50 -07:00 · 2025-03-28 11:26:24 +08:00 · 2025-03-27 16:33:52 -07:00 · 2025-03-27 10:38:39 +08:00
173 changed files with 8720 additions and 3040 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.15.1-beta.3"
+current_version = "0.18.2-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -87,26 +87,11 @@ glob = "node/package.json"
 replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
 search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""

-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-arm64-musl\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-arm64-musl\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-x64-musl\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-x64-musl\": \"{current_version}\""
-
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
 search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""

-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-win32-arm64-msvc\": \"{new_version}\""
-search = "\"@lancedb/vectordb-win32-arm64-msvc\": \"{current_version}\""
-
 # Cargo files
 # ------------
 [[tool.bumpversion.files]]
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -34,6 +34,10 @@ rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=+avx2,+fma,+f16c"
 [target.x86_64-unknown-linux-musl]
 rustflags = ["-C", "target-cpu=haswell", "-C", "target-feature=-crt-static,+avx2,+fma,+f16c"]

+[target.aarch64-unknown-linux-musl]
+linker = "aarch64-linux-musl-gcc"
+rustflags = ["-C", "target-feature=-crt-static"]
+
 [target.aarch64-apple-darwin]
 rustflags = ["-C", "target-cpu=apple-m1", "-C", "target-feature=+neon,+fp16,+fhm,+dotprod"]

@@ -44,4 +48,4 @@ rustflags = ["-Ctarget-feature=+crt-static"]

 # Experimental target for Arm64 Windows
 [target.aarch64-pc-windows-msvc]
-rustflags = ["-Ctarget-feature=+crt-static"]
+rustflags = ["-Ctarget-feature=+crt-static"]
--- a/.github/workflows/build_linux_wheel/action.yml
+++ b/.github/workflows/build_linux_wheel/action.yml
@@ -36,8 +36,7 @@ runs:
        args: ${{ inputs.args }}
        before-script-linux: |
          set -e
-          yum install -y openssl-devel \
-            && curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$(uname -m).zip > /tmp/protoc.zip \
+          curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$(uname -m).zip > /tmp/protoc.zip \
            && unzip /tmp/protoc.zip -d /usr/local \
            && rm /tmp/protoc.zip
    - name: Build Arm Manylinux Wheel
@@ -52,7 +51,7 @@ runs:
        args: ${{ inputs.args }}
        before-script-linux: |
          set -e
-          yum install -y openssl-devel clang \
+          yum install -y clang \
            && curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-aarch_64.zip > /tmp/protoc.zip \
            && unzip /tmp/protoc.zip -d /usr/local \
            && rm /tmp/protoc.zip
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,7 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
-          toolchain: "1.79.0"
+          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -97,7 +97,7 @@ jobs:
      - name: Dry run
        if: github.event_name == 'pull_request'
        run: |
-          mvn --batch-mode -DskipTests package
+          mvn --batch-mode -DskipTests -Drust.release.build=true package
      - name: Set github
        run: |
          git config --global user.email "LanceDB Github Runner"
@@ -108,7 +108,7 @@ jobs:
          echo "use-agent" >> ~/.gnupg/gpg.conf
          echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
          export GPG_TTY=$(tty)
-          mvn --batch-mode -DskipTests -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
+          mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -4,6 +4,10 @@ on:
  push:
    tags:
      - 'python-v*'
+  pull_request:
+    # This should trigger a dry run (we skip the final publish step)
+    paths:
+      - .github/workflows/pypi-publish.yml

 jobs:
  linux:
@@ -46,6 +50,7 @@ jobs:
          arm-build: ${{ matrix.config.platform == 'aarch64' }}
          manylinux: ${{ matrix.config.manylinux }}
      - uses: ./.github/workflows/upload_wheel
+        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
@@ -75,6 +80,7 @@ jobs:
          python-minor-version: 8
          args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
      - uses: ./.github/workflows/upload_wheel
+        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
@@ -96,10 +102,12 @@ jobs:
          args: "--release --strip"
          vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
      - uses: ./.github/workflows/upload_wheel
+        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  gh-release:
+    if: startsWith(github.ref, 'refs/tags/python-v')
    runs-on: ubuntu-latest
    permissions:
      contents: write
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -13,6 +13,11 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

+env:
+  # Color output for pytest is off by default.
+  PYTEST_ADDOPTS: "--color=yes"
+  FORCE_COLOR: "1"
+
 jobs:
  lint:
    name: "Lint"
@@ -33,13 +38,14 @@ jobs:
          python-version: "3.12"
      - name: Install ruff
        run: |
-          pip install ruff==0.8.4
+          pip install ruff==0.9.9
      - name: Format check
        run: ruff format --check .
      - name: Lint
        run: ruff check .
-  doctest:
-    name: "Doctest"
+
+  type-check:
+    name: "Type Check"
    timeout-minutes: 30
    runs-on: "ubuntu-22.04"
    defaults:
@@ -54,7 +60,36 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.12"
+      - name: Install protobuf compiler
+        run: |
+          sudo apt update
+          sudo apt install -y protobuf-compiler
+          pip install toml
+      - name: Install dependencies
+        run: |
+          python ../ci/parse_requirements.py pyproject.toml --extras dev,tests,embeddings > requirements.txt
+          pip install -r requirements.txt
+      - name: Run pyright
+        run: pyright
+
+  doctest:
+    name: "Doctest"
+    timeout-minutes: 30
+    runs-on: "ubuntu-24.04"
+    defaults:
+      run:
+        shell: bash
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
          cache: "pip"
      - name: Install protobuf
        run: |
@@ -75,8 +110,8 @@ jobs:
    timeout-minutes: 30
    strategy:
      matrix:
-        python-minor-version: ["9", "11"]
-    runs-on: "ubuntu-22.04"
+        python-minor-version: ["9", "12"]
+    runs-on: "ubuntu-24.04"
    defaults:
      run:
        shell: bash
@@ -101,6 +136,10 @@ jobs:
      - uses: ./.github/workflows/run_tests
        with:
          integration: true
+      - name: Test without pylance
+        run: |
+          pip uninstall -y pylance
+          pytest -vv python/tests/test_table.py
      # Make sure wheels are not included in the Rust cache
      - name: Delete wheels
        run: rm -rf target/wheels
@@ -127,7 +166,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.12"
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: python
@@ -157,7 +196,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.12"
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: python
@@ -168,7 +207,7 @@ jobs:
        run: rm -rf target/wheels
  pydantic1x:
    timeout-minutes: 30
-    runs-on: "ubuntu-22.04"
+    runs-on: "ubuntu-24.04"
    defaults:
      run:
        shell: bash
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -61,7 +61,12 @@ jobs:
      CXX: clang++
    steps:
      - uses: actions/checkout@v4
-      # Remote cargo.lock to force a fresh build
+      # Building without a lock file often requires the latest Rust version since downstream
+      # dependencies may have updated their minimum Rust version.
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: "stable"
+      # Remove cargo.lock to force a fresh build
      - name: Remove Cargo.lock
        run: rm -f Cargo.lock
      - uses: rui314/setup-mold@v1
@@ -152,151 +157,33 @@ jobs:

  windows:
    runs-on: windows-2022
+    strategy:
+      matrix:
+        target:
+        - x86_64-pc-windows-msvc
+        - aarch64-pc-windows-msvc
+    defaults:
+      run:
+        working-directory: rust/lancedb
    steps:
      - uses: actions/checkout@v4
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
      - name: Install Protoc v21.12
-        working-directory: C:\
+        run: choco install --no-progress protoc
+      - name: Build
        run: |
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          7z x protoc.zip
-          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
+          rustup target add ${{ matrix.target }}
+          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
+          cargo build --features remote --tests --locked --target ${{ matrix.target }}
      - name: Run tests
+        # Can only run tests when target matches host
+        if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
        run: |
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
          cargo test --features remote --locked

-  windows-arm64-cross:
-    # We cross compile in Node releases, so we want to make sure
-    # this can run successfully.
-    runs-on: ubuntu-latest
-    container: alpine:edge
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install dependencies
-        run: |
-          set -e
-          apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
-
-          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y
-          source $HOME/.cargo/env
-          rustup target add aarch64-pc-windows-msvc
-
-          mkdir -p sysroot
-          cd sysroot
-          sh ../ci/sysroot-aarch64-pc-windows-msvc.sh
-      - name: Check
-        env:
-          CC: clang
-          AR: llvm-ar
-          C_INCLUDE_PATH: /usr/aarch64-pc-windows-msvc/usr/include
-          CARGO_BUILD_TARGET: aarch64-pc-windows-msvc
-          RUSTFLAGS: -Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib
-        run: |
-          source $HOME/.cargo/env
-          cargo check --features remote --locked
-
-  windows-arm64:
-    runs-on: windows-4x-arm
-    steps:
-      - name: Install Git
-        run: |
-          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
-          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
-        shell: powershell
-      - name: Add Git to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "C:\Program Files\Git\bin"
-          $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        shell: powershell
-      - name: Configure Git symlinks
-        run: git config --global core.symlinks true
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.13"
-      - name: Install Visual Studio Build Tools
-        run: |
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
-          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
-            "--installPath", "C:\BuildTools", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.ARM64", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Tools.x86.x64", `
-            "--add", "Microsoft.VisualStudio.Component.Windows11SDK.22621", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATL", `
-            "--add", "Microsoft.VisualStudio.Component.VC.ATLMFC", `
-            "--add", "Microsoft.VisualStudio.Component.VC.Llvm.Clang" -Wait
-        shell: powershell
-      - name: Add Visual Studio Build Tools to PATH
-        run: |
-          $vsPath = "C:\BuildTools\VC\Tools\MSVC"
-          $latestVersion = (Get-ChildItem $vsPath | Sort-Object {[version]$_.Name} -Descending)[0].Name
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\arm64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\MSVC\$latestVersion\bin\Hostx64\x64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\arm64"
-          Add-Content $env:GITHUB_PATH "C:\Program Files (x86)\Windows Kits\10\bin\10.0.22621.0\x64"
-          Add-Content $env:GITHUB_PATH "C:\BuildTools\VC\Tools\Llvm\x64\bin"
-
-          # Add MSVC runtime libraries to LIB
-          $env:LIB = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\lib\arm64;" +
-                     "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64;" +
-                     "C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\arm64"
-          Add-Content $env:GITHUB_ENV "LIB=$env:LIB"
-
-          # Add INCLUDE paths
-          $env:INCLUDE = "C:\BuildTools\VC\Tools\MSVC\$latestVersion\include;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\ucrt;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\um;" +
-                        "C:\Program Files (x86)\Windows Kits\10\Include\10.0.22621.0\shared"
-          Add-Content $env:GITHUB_ENV "INCLUDE=$env:INCLUDE"
-        shell: powershell
-      - name: Install Rust
-        run: |
-          Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
-          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
-        shell: powershell
-      - name: Add Rust to PATH
-        run: |
-          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
-        shell: powershell
-      - uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: rust
-      - name: Install 7-Zip ARM
-        run: |
-          New-Item -Path 'C:\7zip' -ItemType Directory
-          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
-          Start-Process -FilePath C:\7zip\7z-installer.exe -ArgumentList '/S' -Wait
-        shell: powershell
-      - name: Add 7-Zip to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
-        shell: powershell
-      - name: Install Protoc v21.12
-        working-directory: C:\
-        run: |
-          if (Test-Path 'C:\protoc') {
-            Write-Host "Protoc directory exists, skipping installation"
-            return
-          }
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          & 'C:\Program Files\7-Zip\7z.exe' x protoc.zip
-        shell: powershell
-      - name: Add Protoc to PATH
-        run: Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Run tests
-        run: |
-          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
-          cargo test --target aarch64-pc-windows-msvc --features remote --locked
-
  msrv:
    # Check the minimum supported Rust version
    name: MSRV Check - Rust v${{ matrix.msrv }}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,21 +1,27 @@
 repos:
-   repo: https://github.com/pre-commit/pre-commit-hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v3.2.0
    hooks:
-    -   id: check-yaml
-    -   id: end-of-file-fixer
-    -   id: trailing-whitespace
-   repo: https://github.com/astral-sh/ruff-pre-commit
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.8.4
+    rev: v0.9.9
    hooks:
-    - id: ruff
- repo: local
-  hooks:
-    - id: local-biome-check
-      name: biome check
-      entry: npx @biomejs/biome@1.8.3 check --config-path nodejs/biome.json nodejs/
-      language: system
-      types: [text]
-      files: "nodejs/.*"
-      exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
+      - id: ruff
+  # - repo: https://github.com/RobertCraigie/pyright-python
+  #   rev: v1.1.395
+  #   hooks:
+  #     - id: pyright
+  #       args: ["--project", "python"]
+  #       additional_dependencies: [pyarrow-stubs]
+  - repo: local
+    hooks:
+      - id: local-biome-check
+        name: biome check
+        entry: npx @biomejs/biome@1.8.3 check --config-path nodejs/biome.json nodejs/
+        language: system
+        types: [text]
+        files: "nodejs/.*"
+        exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,33 +21,32 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.23.0", "features" = [
+lance = { "version" = "=0.25.1", "features" = [
    "dynamodb",
-]}
-lance-io = "=0.23.0"
-lance-index = "=0.23.0"
-lance-linalg = "=0.23.0"
-lance-table = "=0.23.0"
-lance-testing = "=0.23.0"
-lance-datafusion = "=0.23.0"
-lance-encoding = "=0.23.0"
+], tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-io = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-index = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-linalg = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-table = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-testing = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-datafusion = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
+lance-encoding = { version = "=0.25.1", tag = "v0.25.1-beta.3", git = "https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
-arrow = { version = "53.2", optional = false }
-arrow-array = "53.2"
-arrow-data = "53.2"
-arrow-ipc = "53.2"
-arrow-ord = "53.2"
-arrow-schema = "53.2"
-arrow-arith = "53.2"
-arrow-cast = "53.2"
+arrow = { version = "54.1", optional = false }
+arrow-array = "54.1"
+arrow-data = "54.1"
+arrow-ipc = "54.1"
+arrow-ord = "54.1"
+arrow-schema = "54.1"
+arrow-arith = "54.1"
+arrow-cast = "54.1"
 async-trait = "0"
-chrono = "0.4.35"
-datafusion = { version = "44.0", default-features = false }
-datafusion-catalog = "44.0"
-datafusion-common = { version = "44.0", default-features = false }
-datafusion-execution = "44.0"
-datafusion-expr = "44.0"
-datafusion-physical-plan = "44.0"
+datafusion = { version = "45.0", default-features = false }
+datafusion-catalog = "45.0"
+datafusion-common = { version = "45.0", default-features = false }
+datafusion-execution = "45.0"
+datafusion-expr = "45.0"
+datafusion-physical-plan = "45.0"
 env_logger = "0.11"
 half = { "version" = "=2.4.1", default-features = false, features = [
    "num-traits",
@@ -55,14 +54,24 @@ half = { "version" = "=2.4.1", default-features = false, features = [
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.10.2"
+object_store = "0.11.0"
 pin-project = "1.0.7"
-snafu = "0.7.4"
+snafu = "0.8"
 url = "2"
 num-traits = "0.2"
 rand = "0.8"
 regex = "1.10"
 lazy_static = "1"
+semver = "1.0.25"
+
+# Temporary pins to work around downstream issues
+# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
+chrono = "=0.4.39"
+# https://github.com/RustCrypto/formats/issues/1684
+base64ct = "=1.6.0"

 # Workaround for: https://github.com/eira-fransham/crunchy/issues/13
 crunchy = "=0.2.2"
+
+# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
+bytemuck_derive = ">=1.8.1, <1.9.0"
--- a/README.md
+++ b/README.md
@@ -1,9 +1,17 @@
+<a href="https://cloud.lancedb.com" target="_blank">
+  <img src="https://github.com/user-attachments/assets/92dad0a2-2a37-4ce1-b783-0d1b4f30a00c" alt="LanceDB Cloud Public Beta" width="100%" style="max-width: 100%;">
+</a>
+
 <div align="center">
 <p align="center">

-<img width="275" alt="LanceDB Logo" src="https://github.com/lancedb/lancedb/assets/5846846/37d7c7ad-c2fd-4f56-9f16-fffb0d17c73a">
+<picture>
+  <source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/ac270358-333e-4bea-a132-acefaa94040e">
+  <source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/b864d814-0d29-4784-8fd9-807297c758c0">
+  <img alt="LanceDB Logo" src="https://github.com/user-attachments/assets/b864d814-0d29-4784-8fd9-807297c758c0" width=300>
+</picture>

-**Developer-friendly, database for multimodal AI**
+**Search More, Manage Less**

 <a href='https://github.com/lancedb/vectordb-recipes/tree/main' target="_blank"><img alt='LanceDB' src='https://img.shields.io/badge/VectorDB_Recipes-100000?style=for-the-badge&logo=LanceDB&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
 <a href='https://lancedb.github.io/lancedb/' target="_blank"><img alt='lancdb' src='https://img.shields.io/badge/DOCS-100000?style=for-the-badge&logo=lancdb&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
--- a/ci/build_linux_artifacts_nodejs.sh
+++ b/ci/build_linux_artifacts_nodejs.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-set -e
-ARCH=${1:-x86_64}
-
-# We pass down the current user so that when we later mount the local files
-# into the container, the files are accessible by the current user.
-pushd ci/manylinux_node
-docker build \
-    -t lancedb-node-manylinux-$ARCH \
-    --build-arg="ARCH=$ARCH" \
-    --build-arg="DOCKER_USER=$(id -u)" \
-    --progress=plain \
-    .
-popd
-
-# We turn on memory swap to avoid OOM killer
-docker run \
-    -v $(pwd):/io -w /io \
-    --memory-swap=-1 \
-    lancedb-node-manylinux-$ARCH \
-    bash ci/manylinux_node/build_lancedb.sh $ARCH
--- a/ci/build_macos_artifacts_nodejs.sh
+++ b/ci/build_macos_artifacts_nodejs.sh
@@ -1,34 +0,0 @@
-# Builds the macOS artifacts (nodejs binaries).
-# Usage: ./ci/build_macos_artifacts_nodejs.sh [target]
-# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
-set -e
-
-prebuild_rust() {
-    # Building here for the sake of easier debugging.
-    pushd rust/lancedb
-    echo "Building rust library for $1"
-    export RUST_BACKTRACE=1
-    cargo build --release --target $1
-    popd
-}
-
-build_node_binaries() {
-    pushd nodejs
-    echo "Building nodejs library for $1"
-    export RUST_TARGET=$1
-    npm run build-release
-    popd
-}
-
-if [ -n "$1" ]; then
-    targets=$1
-else
-    targets="x86_64-apple-darwin aarch64-apple-darwin"
-fi
-
-echo "Building artifacts for targets: $targets"
-for target in $targets
-    do
-    prebuild_rust $target
-    build_node_binaries $target
-done
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -1,5 +1,5 @@
 # Many linux dockerfile with Rust, Node, and Lance dependencies installed.
-# This container allows building the node modules native libraries in an 
+# This container allows building the node modules native libraries in an
 # environment with a very old glibc, so that we are compatible with a wide
 # range of linux distributions.
 ARG ARCH=x86_64
@@ -9,10 +9,6 @@ FROM quay.io/pypa/manylinux_2_28_${ARCH}
 ARG ARCH=x86_64
 ARG DOCKER_USER=default_user

-# Install static openssl
-COPY install_openssl.sh install_openssl.sh
-RUN ./install_openssl.sh ${ARCH} > /dev/null
-
 # Protobuf is also installed as root.
 COPY install_protobuf.sh install_protobuf.sh
 RUN ./install_protobuf.sh ${ARCH}
@@ -21,7 +17,7 @@ ENV DOCKER_USER=${DOCKER_USER}
 # Create a group and user, but only if it doesn't exist
 RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user

-# We switch to the user to install Rust and Node, since those like to be 
+# We switch to the user to install Rust and Node, since those like to be
 # installed at the user level.
 USER ${DOCKER_USER}

--- a/ci/manylinux_node/build_lancedb.sh
+++ b/ci/manylinux_node/build_lancedb.sh
@@ -1,19 +0,0 @@
-#!/bin/bash
-# Builds the nodejs module for manylinux. Invoked by ci/build_linux_artifacts_nodejs.sh.
-set -e
-ARCH=${1:-x86_64}
-
-if [ "$ARCH" = "x86_64" ]; then
-    export OPENSSL_LIB_DIR=/usr/local/lib64/
-else
-    export OPENSSL_LIB_DIR=/usr/local/lib/
-fi
-export OPENSSL_STATIC=1
-export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
-
-#Alpine doesn't have .bashrc
-FILE=$HOME/.bashrc && test -f $FILE && source $FILE
-
-cd nodejs
-npm ci
-npm run build-release
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -4,14 +4,6 @@ set -e
 ARCH=${1:-x86_64}
 TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}

-if [ "$ARCH" = "x86_64" ]; then
-    export OPENSSL_LIB_DIR=/usr/local/lib64/
-else
-    export OPENSSL_LIB_DIR=/usr/local/lib/
-fi
-export OPENSSL_STATIC=1
-export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
-
 #Alpine doesn't have .bashrc
 FILE=$HOME/.bashrc && test -f $FILE && source $FILE

--- a/ci/manylinux_node/install_openssl.sh
+++ b/ci/manylinux_node/install_openssl.sh
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Builds openssl from source so we can statically link to it
-
-# this is to avoid the error we get with the system installation:
-# /usr/bin/ld: <library>: version node not found for symbol SSLeay@@OPENSSL_1.0.1
-# /usr/bin/ld: failed to set dynamic section sizes: Bad value
-set -e
-
-git clone -b OpenSSL_1_1_1v \
-    --single-branch \
-    https://github.com/openssl/openssl.git
-
-pushd openssl
-
-if [[ $1 == x86_64* ]]; then
-    ARCH=linux-x86_64
-else
-    # gnu target
-    ARCH=linux-aarch64
-fi
-
-./Configure no-shared $ARCH
-
-make
-
-make install
--- a/ci/parse_requirements.py
+++ b/ci/parse_requirements.py
@@ -0,0 +1,41 @@
+import argparse
+import toml
+
+
+def parse_dependencies(pyproject_path, extras=None):
+    with open(pyproject_path, "r") as file:
+        pyproject = toml.load(file)
+
+    dependencies = pyproject.get("project", {}).get("dependencies", [])
+    for dependency in dependencies:
+        print(dependency)
+
+    optional_dependencies = pyproject.get("project", {}).get(
+        "optional-dependencies", {}
+    )
+
+    if extras:
+        for extra in extras.split(","):
+            for dep in optional_dependencies.get(extra, []):
+                print(dep)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate requirements.txt from pyproject.toml"
+    )
+    parser.add_argument("path", type=str, help="Path to pyproject.toml")
+    parser.add_argument(
+        "--extras",
+        type=str,
+        help="Comma-separated list of extras to include",
+        default="",
+    )
+
+    args = parser.parse_args()
+
+    parse_dependencies(args.path, args.extras)
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -4,6 +4,9 @@ repo_url: https://github.com/lancedb/lancedb
 edit_uri: https://github.com/lancedb/lancedb/tree/main/docs/src
 repo_name: lancedb/lancedb
 docs_dir: src
+watch:
+  - src
+  - ../python/python

 theme:
  name: "material"
@@ -63,6 +66,7 @@ plugins:
            - https://arrow.apache.org/docs/objects.inv
            - https://pandas.pydata.org/docs/objects.inv
            - https://lancedb.github.io/lance/objects.inv
+            - https://docs.pydantic.dev/latest/objects.inv
  - mkdocs-jupyter
  - render_swagger:
      allow_arbitrary_locations: true
@@ -105,8 +109,8 @@ nav:
      - 📚 Concepts:
          - Vector search: concepts/vector_search.md
          - Indexing:
-            - IVFPQ: concepts/index_ivfpq.md
-            - HNSW: concepts/index_hnsw.md
+              - IVFPQ: concepts/index_ivfpq.md
+              - HNSW: concepts/index_hnsw.md
          - Storage: concepts/storage.md
          - Data management: concepts/data_management.md
      - 🔨 Guides:
@@ -120,6 +124,9 @@ nav:
              - Overview: hybrid_search/hybrid_search.md
              - Comparing Rerankers: hybrid_search/eval.md
              - Airbnb financial data example: notebooks/hybrid_search.ipynb
+          - Late interaction with MultiVector search:
+              - Overview: guides/multi-vector.md
+              - Example: notebooks/Multivector_on_LanceDB.ipynb
          - RAG:
              - Vanilla RAG: rag/vanilla_rag.md
              - Multi-head RAG: rag/multi_head_rag.md
@@ -130,8 +137,8 @@ nav:
              - Adaptive RAG: rag/adaptive_rag.md
              - SFR RAG: rag/sfr_rag.md
              - Advanced Techniques:
-                - HyDE: rag/advanced_techniques/hyde.md
-                - FLARE: rag/advanced_techniques/flare.md
+                  - HyDE: rag/advanced_techniques/hyde.md
+                  - FLARE: rag/advanced_techniques/flare.md
          - Reranking:
              - Quickstart: reranking/index.md
              - Cohere Reranker: reranking/cohere.md
@@ -146,7 +153,7 @@ nav:
              - Building Custom Rerankers: reranking/custom_reranker.md
              - Example: notebooks/lancedb_reranking.ipynb
          - Filtering: sql.md
-          - Versioning & Reproducibility: 
+          - Versioning & Reproducibility:
              - sync API: notebooks/reproducibility.ipynb
              - async API: notebooks/reproducibility_async.ipynb
          - Configuring Storage: guides/storage.md
@@ -178,6 +185,7 @@ nav:
                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
                  - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
          - User-defined embedding functions: embeddings/custom_embedding_function.md
+          - Variables and secrets: embeddings/variables_and_secrets.md
          - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
          - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
      - 🔌 Integrations:
@@ -228,20 +236,13 @@ nav:
          - 👾 JavaScript (vectordb): javascript/modules.md
          - 👾 JavaScript (lancedb): js/globals.md
          - 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
-      - ☁️ LanceDB Cloud:
-          - Overview: cloud/index.md
-          - API reference:
-              - 🐍 Python: python/saas-python.md
-              - 👾 JavaScript: javascript/modules.md
-              - REST API: cloud/rest.md
-          - FAQs: cloud/cloud_faq.md

  - Quick start: basic.md
  - Concepts:
      - Vector search: concepts/vector_search.md
      - Indexing:
-        - IVFPQ: concepts/index_ivfpq.md
-        - HNSW: concepts/index_hnsw.md
+          - IVFPQ: concepts/index_ivfpq.md
+          - HNSW: concepts/index_hnsw.md
      - Storage: concepts/storage.md
      - Data management: concepts/data_management.md
  - Guides:
@@ -255,6 +256,9 @@ nav:
          - Overview: hybrid_search/hybrid_search.md
          - Comparing Rerankers: hybrid_search/eval.md
          - Airbnb financial data example: notebooks/hybrid_search.ipynb
+      - Late interaction with MultiVector search:
+          - Overview: guides/multi-vector.md
+          - Document search Example: notebooks/Multivector_on_LanceDB.ipynb
      - RAG:
          - Vanilla RAG: rag/vanilla_rag.md
          - Multi-head RAG: rag/multi_head_rag.md
@@ -265,8 +269,8 @@ nav:
          - Adaptive RAG: rag/adaptive_rag.md
          - SFR RAG: rag/sfr_rag.md
          - Advanced Techniques:
-            - HyDE: rag/advanced_techniques/hyde.md
-            - FLARE: rag/advanced_techniques/flare.md
+              - HyDE: rag/advanced_techniques/hyde.md
+              - FLARE: rag/advanced_techniques/flare.md
      - Reranking:
          - Quickstart: reranking/index.md
          - Cohere Reranker: reranking/cohere.md
@@ -280,7 +284,7 @@ nav:
          - Building Custom Rerankers: reranking/custom_reranker.md
          - Example: notebooks/lancedb_reranking.ipynb
      - Filtering: sql.md
-      - Versioning & Reproducibility: 
+      - Versioning & Reproducibility:
          - sync API: notebooks/reproducibility.ipynb
          - async API: notebooks/reproducibility_async.ipynb
      - Configuring Storage: guides/storage.md
@@ -311,6 +315,7 @@ nav:
              - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
              - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
      - User-defined embedding functions: embeddings/custom_embedding_function.md
+      - Variables and secrets: embeddings/variables_and_secrets.md
      - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
      - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
  - Integrations:
@@ -349,21 +354,14 @@ nav:
      - 🦀 Rust:
          - Overview: examples/examples_rust.md
  - Studies:
-    - studies/overview.md
-    - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
+      - studies/overview.md
+      - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
  - API reference:
      - Overview: api_reference.md
      - Python: python/python.md
      - Javascript (vectordb): javascript/modules.md
      - Javascript (lancedb): js/globals.md
      - Rust: https://docs.rs/lancedb/latest/lancedb/index.html
-  - LanceDB Cloud:
-      - Overview: cloud/index.md
-      - API reference:
-          - 🐍 Python: python/saas-python.md
-          - 👾 JavaScript: javascript/modules.md
-          - REST API: cloud/rest.md
-      - FAQs: cloud/cloud_faq.md

 extra_css:
  - styles/global.css
@@ -371,6 +369,7 @@ extra_css:

 extra_javascript:
  - "extra_js/init_ask_ai_widget.js"
+  - "extra_js/reo.js"

 extra:
  analytics:
--- a/docs/openapi.yml
+++ b/docs/openapi.yml
@@ -171,7 +171,7 @@ paths:
                distance_type:
                  type: string
                  description: |
-                    The distance metric to use for search. L2, Cosine, Dot and Hamming are supported. Default is L2.
+                    The distance metric to use for search. l2, Cosine, Dot and Hamming are supported. Default is l2.
                bypass_vector_index:
                  type: boolean
                  description: |
@@ -450,7 +450,7 @@ paths:
                  type: string
                  nullable: false
                  description: |
-                    The metric type to use for the index. L2, Cosine, Dot are supported.
+                    The metric type to use for the index. l2, Cosine, Dot are supported.
                index_type:
                  type: string
      responses:
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -69,7 +69,7 @@ Lance supports `IVF_PQ` index type by default.

 The following IVF_PQ paramters can be specified:

- **distance_type**: The distance metric to use. By default it uses euclidean distance "`L2`".
+- **distance_type**: The distance metric to use. By default it uses euclidean distance "`l2`".
  We also support "cosine" and "dot" distance as well.
 - **num_partitions**: The number of partitions in the index. The default is the square root
  of the number of rows.
--- a/docs/src/ann_indexes.ts
+++ b/docs/src/ann_indexes.ts
@@ -3,6 +3,7 @@ import * as vectordb from "vectordb";
 // --8<-- [end:import]

 (async () => {
+  console.log("ann_indexes.ts: start");
  // --8<-- [start:ingest]
  const db = await vectordb.connect("data/sample-lancedb");

@@ -49,5 +50,5 @@ import * as vectordb from "vectordb";
    .execute();
  // --8<-- [end:search3]

-  console.log("Ann indexes: done");
+  console.log("ann_indexes.ts: done");
 })();
--- a/docs/src/basic_legacy.ts
+++ b/docs/src/basic_legacy.ts
@@ -107,7 +107,6 @@ const example = async () => {
  // --8<-- [start:search]
  const query = await tbl.search([100, 100]).limit(2).execute();
  // --8<-- [end:search]
-  console.log(query);

  // --8<-- [start:delete]
  await tbl.delete('item = "fizz"');
@@ -119,8 +118,9 @@ const example = async () => {
 };

 async function main() {
+  console.log("basic_legacy.ts: start");
  await example();
-  console.log("Basic example: done");
+  console.log("basic_legacy.ts: done");
 }

 main();
--- a/docs/src/cloud/index.md
+++ b/docs/src/cloud/index.md
@@ -2,7 +2,7 @@

 LanceDB Cloud is a SaaS (software-as-a-service) solution that runs serverless in the cloud, clearly separating storage from compute. It's designed to be highly scalable without breaking the bank. LanceDB Cloud is currently in private beta with general availability coming soon, but you can apply for early access with the private beta release by signing up below.

-[Try out LanceDB Cloud](https://noteforms.com/forms/lancedb-mailing-list-cloud-kty1o5?notionforms=1&utm_source=notionforms){ .md-button .md-button--primary }
+[Try out LanceDB Cloud (Public Beta)](https://cloud.lancedb.com){ .md-button .md-button--primary }

 ## Architecture

--- a/docs/src/concepts/index_hnsw.md
+++ b/docs/src/concepts/index_hnsw.md
@@ -59,7 +59,7 @@ Then the greedy search routine operates as follows:

 There are three key parameters to set when constructing an HNSW index:

-* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
+* `metric`: Use an `l2` euclidean distance metric. We also support `dot` and `cosine` distance.
 * `m`: The number of neighbors to select for each vector in the HNSW graph.
 * `ef_construction`: The number of candidates to evaluate during the construction of the HNSW graph.

--- a/docs/src/concepts/index_ivfpq.md
+++ b/docs/src/concepts/index_ivfpq.md
@@ -47,7 +47,7 @@ We can combine the above concepts to understand how to build and query an IVF-PQ

 There are three key parameters to set when constructing an IVF-PQ index:

-* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
+* `metric`: Use an `l2` euclidean distance metric. We also support `dot` and `cosine` distance.
 * `num_partitions`: The number of partitions in the IVF portion of the index.
 * `num_sub_vectors`: The number of sub-vectors that will be created during Product Quantization (PQ).

@@ -56,7 +56,7 @@ In Python, the index can be created as follows:
 ```python
 # Create and train the index for a 1536-dimensional vector
 # Make sure you have enough data in the table for an effective training step
-tbl.create_index(metric="L2", num_partitions=256, num_sub_vectors=96)
+tbl.create_index(metric="l2", num_partitions=256, num_sub_vectors=96)
 ```
 !!! note
    `num_partitions`=256 and `num_sub_vectors`=96 does not work for every dataset. Those values needs to be adjusted for your particular dataset.
--- a/docs/src/embeddings/custom_embedding_function.md
+++ b/docs/src/embeddings/custom_embedding_function.md
@@ -55,6 +55,14 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp

 This is a stripped down version of our implementation of `SentenceTransformerEmbeddings` that removes certain optimizations and default settings.

+!!! danger "Use sensitive keys to prevent leaking secrets"
+    To prevent leaking secrets, such as API keys, you should add any sensitive
+    parameters of an embedding function to the output of the
+    [sensitive_keys()][lancedb.embeddings.base.EmbeddingFunction.sensitive_keys] /
+    [getSensitiveKeys()](../../js/namespaces/embedding/classes/EmbeddingFunction/#getsensitivekeys)
+    method. This prevents users from accidentally instantiating the embedding
+    function with hard-coded secrets.
+
 Now you can use this embedding function to create your table schema and that's it! you can then ingest data and run queries without manually vectorizing the inputs.

 === "Python"
--- a/docs/src/embeddings/understanding_embeddings.md
+++ b/docs/src/embeddings/understanding_embeddings.md
@@ -54,7 +54,7 @@ As mentioned, after creating embedding, each data point is represented as a vect

 Points that are close to each other in vector space are considered similar (or appear in similar contexts), and points that are far away are considered dissimilar. To quantify this closeness, we use distance as a metric which can be measured in the  following way - 

-1. **Euclidean Distance (L2)**: It calculates the straight-line distance between two points (vectors) in a multidimensional space.
+1. **Euclidean Distance (l2)**: It calculates the straight-line distance between two points (vectors) in a multidimensional space.
 2. **Cosine Similarity**: It measures the cosine of the angle between two vectors, providing a normalized measure of similarity based on their direction.
 3. **Dot product**: It is calculated as the sum of the products of their corresponding components. To measure relatedness it considers both the magnitude and direction of the vectors.

--- a/docs/src/embeddings/variables_and_secrets.md
+++ b/docs/src/embeddings/variables_and_secrets.md
@@ -0,0 +1,53 @@
+# Variable and Secrets
+
+Most embedding configuration options are saved in the table's metadata. However,
+this isn't always appropriate. For example, API keys should never be stored in the
+metadata. Additionally, other configuration options might be best set at runtime,
+such as the `device` configuration that controls whether to use GPU or CPU for
+inference. If you hardcoded this to GPU, you wouldn't be able to run the code on
+a server without one.
+
+To handle these cases, you can set variables on the embedding registry and
+reference them in the embedding configuration. These variables will be available
+during the runtime of your program, but not saved in the table's metadata. When
+the table is loaded from a different process, the variables must be set again.
+
+To set a variable, use the `set_var()` / `setVar()` method on the embedding registry.
+To reference a variable, use the syntax `$env:VARIABLE_NAME`. If there is a default
+value, you can use the syntax `$env:VARIABLE_NAME:DEFAULT_VALUE`.
+
+## Using variables to set secrets
+
+Sensitive configuration, such as API keys, must either be set as environment
+variables or using variables on the embedding registry. If you pass in a hardcoded
+value, LanceDB will raise an error. Instead, if you want to set an API key via
+configuration, use a variable:
+
+=== "Python"
+
+    ```python
+    --8<-- "python/python/tests/docs/test_embeddings_optional.py:register_secret"
+    ```
+
+=== "Typescript"
+
+    ```typescript
+    --8<-- "nodejs/examples/embedding.test.ts:register_secret"
+    ```
+
+## Using variables to set the device parameter
+
+Many embedding functions that run locally have a `device` parameter that controls
+whether to use GPU or CPU for inference. Because not all computers have a GPU,
+it's helpful to be able to set the `device` parameter at runtime, rather than
+have it hard coded in the embedding configuration. To make it work even if the
+variable isn't set, you could provide a default value of `cpu` in the embedding
+configuration.
+
+Some embedding libraries even have a method to detect which devices are available,
+which could be used to dynamically set the device at runtime. For example, in Python
+you can check if a CUDA GPU is available using `torch.cuda.is_available()`.
+
+```python
+--8<-- "python/python/tests/docs/test_embeddings_optional.py:register_device"
+```
--- a/docs/src/examples/index.md
+++ b/docs/src/examples/index.md
@@ -8,15 +8,5 @@ LanceDB provides language APIs, allowing you to embed a database in your languag
 * 👾 [JavaScript](examples_js.md) examples
 * 🦀 Rust examples (coming soon)

-## Python Applications powered by LanceDB
-
-| Project Name | Description |
-| --- | --- |
-| **Ultralytics Explorer 🚀**<br>[![Ultralytics](https://img.shields.io/badge/Ultralytics-Docs-green?labelColor=0f3bc4&style=flat-square&logo=https://cdn.prod.website-files.com/646dd1f1a3703e451ba81ecc/64994922cf2a6385a4bf4489_UltralyticsYOLO_mark_blue.svg&link=https://docs.ultralytics.com/datasets/explorer/)](https://docs.ultralytics.com/datasets/explorer/)<br>[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/docs/en/datasets/explorer/explorer.ipynb) |  - 🔍 **Explore CV Datasets**: Semantic search, SQL queries, vector similarity, natural language.<br>- 🖥️ **GUI & Python API**: Seamless dataset interaction.<br>- ⚡ **Efficient & Scalable**: Leverages LanceDB for large datasets.<br>- 📊 **Detailed Analysis**: Easily analyze data patterns.<br>- 🌐 **Browser GUI Demo**: Create embeddings, search images, run queries. |
-| **Website Chatbot🤖**<br>[![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/lancedb/lancedb-vercel-chatbot)<br>[![Deploy with Vercel](https://vercel.com/button)](https://vercel.com/new/clone?repository-url=https%3A%2F%2Fgithub.com%2Flancedb%2Flancedb-vercel-chatbot&amp;env=OPENAI_API_KEY&amp;envDescription=OpenAI%20API%20Key%20for%20chat%20completion.&amp;project-name=lancedb-vercel-chatbot&amp;repository-name=lancedb-vercel-chatbot&amp;demo-title=LanceDB%20Chatbot%20Demo&amp;demo-description=Demo%20website%20chatbot%20with%20LanceDB.&amp;demo-url=https%3A%2F%2Flancedb.vercel.app&amp;demo-image=https%3A%2F%2Fi.imgur.com%2FazVJtvr.png) | - 🌐 **Chatbot from Sitemap/Docs**: Create a chatbot using site or document context.<br>- 🚀 **Embed LanceDB in Next.js**: Lightweight, on-prem storage.<br>- 🧠 **AI-Powered Context Retrieval**: Efficiently access relevant data.<br>- 🔧 **Serverless & Native JS**: Seamless integration with Next.js.<br>- ⚡ **One-Click Deploy on Vercel**: Quick and easy setup.. |
-
-## Nodejs Applications powered by LanceDB
-
-| Project Name | Description |
-| --- | --- |
-| **Langchain Writing Assistant✍️ **<br>[![Github](../assets/github.svg)](https://github.com/lancedb/vectordb-recipes/tree/main/applications/node/lanchain_writing_assistant) |  - **📂 Data Source Integration**:  Use your own data by specifying data source file, and the app instantly processes it to provide insights. <br>- **🧠 Intelligent Suggestions**:  Powered by LangChain.js and LanceDB, it improves writing productivity and accuracy.  <br>- **💡 Enhanced Writing Experience**: It delivers real-time contextual insights and factual suggestions while the user writes. |
+!!! tip "Hosted LanceDB"
+    If you want S3 cost-efficiency and local performance via a simple serverless API, checkout **LanceDB Cloud**. For private deployments, high performance at extreme scale, or if you have strict security requirements, talk to us about **LanceDB Enterprise**. [Learn more](https://docs.lancedb.com/)
--- a/docs/src/extra_js/reo.js
+++ b/docs/src/extra_js/reo.js
@@ -0,0 +1 @@
+!function(){var e,t,n;e="9627b71b382d201",t=function(){Reo.init({clientID:"9627b71b382d201"})},(n=document.createElement("script")).src="https://static.reo.dev/"+e+"/reo.js",n.defer=!0,n.onload=t,document.head.appendChild(n)}();
--- a/docs/src/guides/multi-vector.md
+++ b/docs/src/guides/multi-vector.md
@@ -0,0 +1,85 @@
+# Late interaction & MultiVector embedding type
+Late interaction is a technique used in retrieval that calculates the relevance of a query to a document by comparing their multi-vector representations. The key difference between late interaction and other popular methods:
+
+![late interaction vs other methods](https://raw.githubusercontent.com/lancedb/assets/b035a0ceb2c237734e0d393054c146d289792339/docs/assets/integration/colbert-blog-interaction.svg)
+
+
+[ Illustration from https://jina.ai/news/what-is-colbert-and-late-interaction-and-why-they-matter-in-search/]
+
+<b>No interaction:</b> Refers to independently embedding the query and document, that are compared to calcualte similarity without any interaction between them. This is typically used in vector search operations.
+
+<b>Partial interaction</b> Refers to a specific approach where the similarity computation happens primarily between query vectors and document vectors, without extensive interaction between individual components of each. An example of this is dual-encoder models like BERT.
+
+<b>Early full interaction</b> Refers to techniques like cross-encoders that process query and docs in pairs with full interaction across various stages of encoding. This is a powerful, but relatively slower technique. Because it requires processing query and docs in pairs, doc embeddings can't be pre-computed for fast retrieval. This is why cross encoders are typically used as reranking models combined with vector search. Learn more about [LanceDB Reranking support](https://lancedb.github.io/lancedb/reranking/).
+
+<b>Late interaction</b> Late interaction is a technique that calculates the doc and query similarity independently and then the interaction or evaluation happens during the retrieval process. This is typically used in retrieval models like ColBERT. Unlike early interaction, It allows speeding up the retrieval process without compromising the depth of semantic analysis.
+
+## Internals of ColBERT 
+Let's take a look at the steps involved in performing late interaction based retrieval using ColBERT:
+
+• ColBERT employs BERT-based encoders for both queries `(fQ)` and documents `(fD)`
+• A single BERT model is shared between query and document encoders and special tokens distinguish input types: `[Q]` for queries and `[D]` for documents
+
+**Query Encoder (fQ):**
+• Query q is tokenized into WordPiece tokens: `q1, q2, ..., ql`. `[Q]` token is prepended right after BERT's `[CLS]` token
+• If query length < Nq, it's padded with [MASK] tokens up to Nq.
+• The padded sequence goes through BERT's transformer architecture
+• Final embeddings are L2-normalized.
+
+**Document Encoder (fD):**
+• Document d is tokenized into tokens `d1, d2, ..., dm`. `[D]` token is prepended after `[CLS]` token
+• Unlike queries, documents are NOT padded with `[MASK]` tokens
+• Document tokens are processed through BERT and the same linear layer
+
+**Late Interaction:**
+• Late interaction estimates relevance score `S(q,d)` using embedding `Eq` and `Ed`. Late interaction happens after independent encoding
+• For each query embedding, maximum similarity is computed against all document embeddings
+• The similarity measure can be cosine similarity or squared L2 distance
+
+**MaxSim Calculation:**
+```
+S(q,d) := Σ max(Eqi⋅EdjT)
+          i∈|Eq| j∈|Ed|
+```
+• This finds the best matching document embedding for each query embedding
+• Captures relevance based on strongest local matches between contextual embeddings
+
+## LanceDB MultiVector type
+LanceDB supports multivector type, this is useful when you have multiple vectors for a single item (e.g. with ColBert and ColPali).
+
+You can index on a column with multivector type and search on it, the query can be single vector or multiple vectors. For now, only cosine metric is supported for multivector search. The vector value type can be float16, float32 or float64. LanceDB integrateds [ConteXtualized Token Retriever(XTR)](https://arxiv.org/abs/2304.01982), which introduces a simple, yet novel, objective function that encourages the model to retrieve the most important document tokens first. 
+
+```python
+import lancedb
+import numpy as np
+import pyarrow as pa
+
+db = lancedb.connect("data/multivector_demo")
+schema = pa.schema(
+    [
+        pa.field("id", pa.int64()),
+        # float16, float32, and float64 are supported
+        pa.field("vector", pa.list_(pa.list_(pa.float32(), 256))),
+    ]
+)
+data = [
+    {
+        "id": i,
+        "vector": np.random.random(size=(2, 256)).tolist(),
+    }
+    for i in range(1024)
+]
+tbl = db.create_table("my_table", data=data, schema=schema)
+
+# only cosine similarity is supported for multi-vectors
+tbl.create_index(metric="cosine")
+
+# query with single vector
+query = np.random.random(256).astype(np.float16)
+tbl.search(query).to_arrow()
+
+# query with multiple vectors
+query = np.random.random(size=(2, 256))
+tbl.search(query).to_arrow()
+```
+Find more about vector search in LanceDB [here](https://lancedb.github.io/lancedb/search/#multivector-type).
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -4,6 +4,9 @@ LanceDB is an open-source vector database for AI that's designed to store, manag

 Both the database and the underlying data format are designed from the ground up to be **easy-to-use**, **scalable** and **cost-effective**.

+!!! tip "Hosted LanceDB"
+    If you want S3 cost-efficiency and local performance via a simple serverless API, checkout **LanceDB Cloud**. For private deployments, high performance at extreme scale, or if you have strict security requirements, talk to us about **LanceDB Enterprise**. [Learn more](https://docs.lancedb.com/)
+
 ![](assets/lancedb_and_lance.png)

 ## Truly multi-modal
@@ -20,7 +23,7 @@ LanceDB **OSS** is an **open-source**, batteries-included embedded vector databa

 LanceDB **Cloud** is a SaaS (software-as-a-service) solution that runs serverless in the cloud, making the storage clearly separated from compute. It's designed to be cost-effective and highly scalable without breaking the bank. LanceDB Cloud is currently in private beta with general availability coming soon, but you can apply for early access with the private beta release by signing up below.

-[Try out LanceDB Cloud](https://noteforms.com/forms/lancedb-mailing-list-cloud-kty1o5?notionforms=1&utm_source=notionforms){ .md-button .md-button--primary }
+[Try out LanceDB Cloud (Public Beta) Now](https://cloud.lancedb.com){ .md-button .md-button--primary }

 ## Why use LanceDB?

--- a/docs/src/integrations/langchain.md
+++ b/docs/src/integrations/langchain.md
@@ -108,7 +108,7 @@ This method creates a scalar(for non-vector cols) or a vector index on a table.
 |:---|:---|:---|:---|
 |`vector_col`|`Optional[str]`| Provide if you want to create index on a vector column. |`None`|
 |`col_name`|`Optional[str]`| Provide if you want to create index on a non-vector column. |`None`|
-|`metric`|`Optional[str]` |Provide the metric to use for vector index. choice of metrics: 'L2', 'dot', 'cosine'. |`L2`|
+|`metric`|`Optional[str]` |Provide the metric to use for vector index. choice of metrics: 'l2', 'dot', 'cosine'. |`l2`|
 |`num_partitions`|`Optional[int]`|Number of partitions to use for the index.|`256`|
 |`num_sub_vectors`|`Optional[int]` |Number of sub-vectors to use for the index.|`96`|
 |`index_cache_size`|`Optional[int]` |Size of the index cache.|`None`|
--- a/docs/src/integrations/llamaIndex.md
+++ b/docs/src/integrations/llamaIndex.md
@@ -125,7 +125,7 @@ The exhaustive list of parameters for `LanceDBVectorStore` vector store are :
        ```               
 - **_table_exists(self, tbl_name: `Optional[str]` = `None`) -> `bool`** : Returns `True` if `tbl_name` exists in database.
 - __create_index(  
-  self, scalar: `Optional[bool]` = False, col_name: `Optional[str]` = None, num_partitions: `Optional[int]` = 256, num_sub_vectors: `Optional[int]` = 96, index_cache_size: `Optional[int]` = None, metric: `Optional[str]` = "L2",  
+  self, scalar: `Optional[bool]` = False, col_name: `Optional[str]` = None, num_partitions: `Optional[int]` = 256, num_sub_vectors: `Optional[int]` = 96, index_cache_size: `Optional[int]` = None, metric: `Optional[str]` = "l2",  
 ) -> `None`__ : Creates a scalar(for non-vector cols) or a vector index on a table.
        Make sure your vector column has enough data before creating an index on it.

--- a/docs/src/javascript/enums/MetricType.md
+++ b/docs/src/javascript/enums/MetricType.md
@@ -10,7 +10,7 @@ Distance metrics type.

 - [Cosine](MetricType.md#cosine)
 - [Dot](MetricType.md#dot)
- [L2](MetricType.md#l2)
+- [l2](MetricType.md#l2)

 ## Enumeration Members

--- a/docs/src/javascript/interfaces/IvfPQIndexConfig.md
+++ b/docs/src/javascript/interfaces/IvfPQIndexConfig.md
@@ -85,7 +85,7 @@ ___

 • `Optional` **metric\_type**: [`MetricType`](../enums/MetricType.md)

-Metric type, L2 or Cosine
+Metric type, l2 or Cosine

 #### Defined in

--- a/docs/src/js/README.md
+++ b/docs/src/js/README.md
@@ -15,11 +15,9 @@ npm install @lancedb/lancedb
 This will download the appropriate native library for your platform. We currently
 support:

- Linux (x86_64 and aarch64)
+- Linux (x86_64 and aarch64 on glibc and musl)
 - MacOS (Intel and ARM/M1/M2)
- Windows (x86_64 only)
-
-We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
+- Windows (x86_64 and aarch64)

 ## Usage

--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -126,6 +126,37 @@ the vectors.

 ***

+### ivfFlat()
+
+```ts
+static ivfFlat(options?): Index
+```
+
+Create an IvfFlat index
+
+This index groups vectors into partitions of similar vectors.  Each partition keeps track of
+a centroid which is the average value of all vectors in the group.
+
+During a query the centroids are compared with the query vector to find the closest
+partitions.  The vectors in these partitions are then searched to find
+the closest vectors.
+
+The partitioning process is called IVF and the `num_partitions` parameter controls how
+many groups to create.
+
+Note that training an IVF FLAT index on a large dataset is a slow operation and
+currently is also a memory intensive operation.
+
+#### Parameters
+
+* **options?**: `Partial`&lt;[`IvfFlatOptions`](../interfaces/IvfFlatOptions.md)&gt;
+
+#### Returns
+
+[`Index`](Index.md)
+
+***
+
 ### ivfPq()

 ```ts
--- a/docs/src/js/functions/packBits.md
+++ b/docs/src/js/functions/packBits.md
@@ -0,0 +1,19 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / packBits
+
+# Function: packBits()
+
+```ts
+function packBits(data): number[]
+```
+
+## Parameters
+
+* **data**: `number`[]
+
+## Returns
+
+`number`[]
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -39,6 +39,7 @@
 - [IndexConfig](interfaces/IndexConfig.md)
 - [IndexOptions](interfaces/IndexOptions.md)
 - [IndexStatistics](interfaces/IndexStatistics.md)
+- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
@@ -66,3 +67,4 @@

 - [connect](functions/connect.md)
 - [makeArrowTable](functions/makeArrowTable.md)
+- [packBits](functions/packBits.md)
--- a/docs/src/js/interfaces/ColumnAlteration.md
+++ b/docs/src/js/interfaces/ColumnAlteration.md
@@ -16,7 +16,7 @@ must be provided.
 ### dataType?

 ```ts
-optional dataType: string;
+optional dataType: string | DataType<Type, any>;
 ```

 A new data type for the column. If not provided then the data type will not be changed.
--- a/docs/src/js/interfaces/HnswPqOptions.md
+++ b/docs/src/js/interfaces/HnswPqOptions.md
@@ -24,18 +24,18 @@ The following distance types are available:

 "l2" - Euclidean distance. This is a very common distance metric that
 accounts for both magnitude and direction when determining the distance
-between vectors. L2 distance has a range of [0, ∞).
+between vectors. l2 distance has a range of [0, ∞).

 "cosine" - Cosine distance.  Cosine distance is a distance metric
 calculated from the cosine similarity between two vectors. Cosine
 similarity is a measure of similarity between two non-zero vectors of an
 inner product space. It is defined to equal the cosine of the angle
-between them.  Unlike L2, the cosine distance is not affected by the
+between them.  Unlike l2, the cosine distance is not affected by the
 magnitude of the vectors.  Cosine distance has a range of [0, 2].

 "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
 distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-L2 norm is 1), then dot distance is equivalent to the cosine distance.
+l2 norm is 1), then dot distance is equivalent to the cosine distance.

 ***

--- a/docs/src/js/interfaces/HnswSqOptions.md
+++ b/docs/src/js/interfaces/HnswSqOptions.md
@@ -24,18 +24,18 @@ The following distance types are available:

 "l2" - Euclidean distance. This is a very common distance metric that
 accounts for both magnitude and direction when determining the distance
-between vectors. L2 distance has a range of [0, ∞).
+between vectors. l2 distance has a range of [0, ∞).

 "cosine" - Cosine distance.  Cosine distance is a distance metric
 calculated from the cosine similarity between two vectors. Cosine
 similarity is a measure of similarity between two non-zero vectors of an
 inner product space. It is defined to equal the cosine of the angle
-between them.  Unlike L2, the cosine distance is not affected by the
+between them.  Unlike l2, the cosine distance is not affected by the
 magnitude of the vectors.  Cosine distance has a range of [0, 2].

 "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
 distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-L2 norm is 1), then dot distance is equivalent to the cosine distance.
+l2 norm is 1), then dot distance is equivalent to the cosine distance.

 ***

--- a/docs/src/js/interfaces/IndexStatistics.md
+++ b/docs/src/js/interfaces/IndexStatistics.md
@@ -30,6 +30,17 @@ The type of the index

 ***

+### loss?
+
+```ts
+optional loss: number;
+```
+
+The KMeans loss value of the index,
+it is only present for vector indices.
+
+***
+
 ### numIndexedRows

 ```ts
--- a/docs/src/js/interfaces/IvfFlatOptions.md
+++ b/docs/src/js/interfaces/IvfFlatOptions.md
@@ -0,0 +1,112 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / IvfFlatOptions
+
+# Interface: IvfFlatOptions
+
+Options to create an `IVF_FLAT` index
+
+## Properties
+
+### distanceType?
+
+```ts
+optional distanceType: "l2" | "cosine" | "dot" | "hamming";
+```
+
+Distance type to use to build the index.
+
+Default value is "l2".
+
+This is used when training the index to calculate the IVF partitions
+(vectors are grouped in partitions with similar vectors according to this
+distance type).
+
+The distance type used to train an index MUST match the distance type used
+to search the index.  Failure to do so will yield inaccurate results.
+
+The following distance types are available:
+
+"l2" - Euclidean distance. This is a very common distance metric that
+accounts for both magnitude and direction when determining the distance
+between vectors. l2 distance has a range of [0, ∞).
+
+"cosine" - Cosine distance.  Cosine distance is a distance metric
+calculated from the cosine similarity between two vectors. Cosine
+similarity is a measure of similarity between two non-zero vectors of an
+inner product space. It is defined to equal the cosine of the angle
+between them.  Unlike l2, the cosine distance is not affected by the
+magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+Note: the cosine distance is undefined when one (or both) of the vectors
+are all zeros (there is no direction).  These vectors are invalid and may
+never be returned from a vector search.
+
+"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+l2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+"hamming" - Hamming distance. Hamming distance is a distance metric
+calculated from the number of bits that are different between two vectors.
+Hamming distance has a range of [0, dimension]. Note that the hamming distance
+is only valid for binary vectors.
+
+***
+
+### maxIterations?
+
+```ts
+optional maxIterations: number;
+```
+
+Max iteration to train IVF kmeans.
+
+When training an IVF FLAT index we use kmeans to calculate the partitions.  This parameter
+controls how many iterations of kmeans to run.
+
+Increasing this might improve the quality of the index but in most cases these extra
+iterations have diminishing returns.
+
+The default value is 50.
+
+***
+
+### numPartitions?
+
+```ts
+optional numPartitions: number;
+```
+
+The number of IVF partitions to create.
+
+This value should generally scale with the number of rows in the dataset.
+By default the number of partitions is the square root of the number of
+rows.
+
+If this value is too large then the first part of the search (picking the
+right partition) will be slow.  If this value is too small then the second
+part of the search (searching within a partition) will be slow.
+
+***
+
+### sampleRate?
+
+```ts
+optional sampleRate: number;
+```
+
+The number of vectors, per partition, to sample when training IVF kmeans.
+
+When an IVF FLAT index is trained, we need to calculate partitions.  These are groups
+of vectors that are similar to each other.  To do this we use an algorithm called kmeans.
+
+Running kmeans on a large dataset can be slow.  To speed this up we run kmeans on a
+random sample of the data.  This parameter controls the size of the sample.  The total
+number of vectors used to train the index is `sample_rate * num_partitions`.
+
+Increasing this value might improve the quality of the index but in most cases the
+default should be sufficient.
+
+The default value is 256.
--- a/docs/src/js/interfaces/IvfPqOptions.md
+++ b/docs/src/js/interfaces/IvfPqOptions.md
@@ -31,13 +31,13 @@ The following distance types are available:

 "l2" - Euclidean distance. This is a very common distance metric that
 accounts for both magnitude and direction when determining the distance
-between vectors. L2 distance has a range of [0, ∞).
+between vectors. l2 distance has a range of [0, ∞).

 "cosine" - Cosine distance.  Cosine distance is a distance metric
 calculated from the cosine similarity between two vectors. Cosine
 similarity is a measure of similarity between two non-zero vectors of an
 inner product space. It is defined to equal the cosine of the angle
-between them.  Unlike L2, the cosine distance is not affected by the
+between them.  Unlike l2, the cosine distance is not affected by the
 magnitude of the vectors.  Cosine distance has a range of [0, 2].

 Note: the cosine distance is undefined when one (or both) of the vectors
@@ -46,7 +46,7 @@ never be returned from a vector search.

 "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
 distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-L2 norm is 1), then dot distance is equivalent to the cosine distance.
+l2 norm is 1), then dot distance is equivalent to the cosine distance.

 ***

--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
@@ -8,6 +8,23 @@

 An embedding function that automatically creates vector representation for a given column.

+It's important subclasses pass the **original** options to the super constructor
+and then pass those options to `resolveVariables` to resolve any variables before
+using them.
+
+## Example
+
+```ts
+class MyEmbeddingFunction extends EmbeddingFunction {
+  constructor(options: {model: string, timeout: number}) {
+    super(optionsRaw);
+    const options = this.resolveVariables(optionsRaw);
+    this.model = options.model;
+    this.timeout = options.timeout;
+  }
+}
+```
+
 ## Extended by

 - [`TextEmbeddingFunction`](TextEmbeddingFunction.md)
@@ -82,12 +99,33 @@ The datatype of the embeddings

 ***

+### getSensitiveKeys()
+
+```ts
+protected getSensitiveKeys(): string[]
+```
+
+Provide a list of keys in the function options that should be treated as
+sensitive. If users pass raw values for these keys, they will be rejected.
+
+#### Returns
+
+`string`[]
+
+***
+
 ### init()?

 ```ts
 optional init(): Promise<void>
 ```

+Optionally load any resources needed for the embedding function.
+
+This method is called after the embedding function has been initialized
+but before any embeddings are computed. It is useful for loading local models
+or other resources that are needed for the embedding function to work.
+
 #### Returns

 `Promise`&lt;`void`&gt;
@@ -108,6 +146,24 @@ The number of dimensions of the embeddings

 ***

+### resolveVariables()
+
+```ts
+protected resolveVariables(config): Partial<M>
+```
+
+Apply variables to the config.
+
+#### Parameters
+
+* **config**: `Partial`&lt;`M`&gt;
+
+#### Returns
+
+`Partial`&lt;`M`&gt;
+
+***
+
 ### sourceField()

 ```ts
@@ -134,37 +190,15 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d
 ### toJSON()

 ```ts
-abstract toJSON(): Partial<M>
+toJSON(): Record<string, any>
 ```

-Convert the embedding function to a JSON object
-It is used to serialize the embedding function to the schema
-It's important that any object returned by this method contains all the necessary
-information to recreate the embedding function
-
-It should return the same object that was passed to the constructor
-If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
+Get the original arguments to the constructor, to serialize them so they
+can be used to recreate the embedding function later.

 #### Returns

-`Partial`&lt;`M`&gt;
-
-#### Example
-
-```ts
-class MyEmbeddingFunction extends EmbeddingFunction {
-  constructor(options: {model: string, timeout: number}) {
-    super();
-    this.model = options.model;
-    this.timeout = options.timeout;
-  }
-  toJSON() {
-    return {
-      model: this.model,
-      timeout: this.timeout,
-    };
-}
-```
+`Record`&lt;`string`, `any`&gt;

 ***

--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
@@ -80,6 +80,28 @@ getTableMetadata(functions): Map<string, string>

 ***

+### getVar()
+
+```ts
+getVar(name): undefined | string
+```
+
+Get a variable.
+
+#### Parameters
+
+* **name**: `string`
+
+#### Returns
+
+`undefined` \| `string`
+
+#### See
+
+[setVar](EmbeddingFunctionRegistry.md#setvar)
+
+***
+
 ### length()

 ```ts
@@ -145,3 +167,31 @@ reset the registry to the initial state
 #### Returns

 `void`
+
+***
+
+### setVar()
+
+```ts
+setVar(name, value): void
+```
+
+Set a variable. These can be accessed in the embedding function
+configuration using the syntax `$var:variable_name`. If they are not
+set, an error will be thrown letting you know which key is unset. If you
+want to supply a default value, you can add an additional part in the
+configuration like so: `$var:variable_name:default_value`. Default values
+can be used for runtime configurations that are not sensitive, such as
+whether to use a GPU for inference.
+
+The name must not contain colons. The default value can contain colons.
+
+#### Parameters
+
+* **name**: `string`
+
+* **value**: `string`
+
+#### Returns
+
+`void`
--- a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
@@ -114,12 +114,37 @@ abstract generateEmbeddings(texts, ...args): Promise<number[][] | Float32Array[]

 ***

+### getSensitiveKeys()
+
+```ts
+protected getSensitiveKeys(): string[]
+```
+
+Provide a list of keys in the function options that should be treated as
+sensitive. If users pass raw values for these keys, they will be rejected.
+
+#### Returns
+
+`string`[]
+
+#### Inherited from
+
+[`EmbeddingFunction`](EmbeddingFunction.md).[`getSensitiveKeys`](EmbeddingFunction.md#getsensitivekeys)
+
+***
+
 ### init()?

 ```ts
 optional init(): Promise<void>
 ```

+Optionally load any resources needed for the embedding function.
+
+This method is called after the embedding function has been initialized
+but before any embeddings are computed. It is useful for loading local models
+or other resources that are needed for the embedding function to work.
+
 #### Returns

 `Promise`&lt;`void`&gt;
@@ -148,6 +173,28 @@ The number of dimensions of the embeddings

 ***

+### resolveVariables()
+
+```ts
+protected resolveVariables(config): Partial<M>
+```
+
+Apply variables to the config.
+
+#### Parameters
+
+* **config**: `Partial`&lt;`M`&gt;
+
+#### Returns
+
+`Partial`&lt;`M`&gt;
+
+#### Inherited from
+
+[`EmbeddingFunction`](EmbeddingFunction.md).[`resolveVariables`](EmbeddingFunction.md#resolvevariables)
+
+***
+
 ### sourceField()

 ```ts
@@ -173,37 +220,15 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d
 ### toJSON()

 ```ts
-abstract toJSON(): Partial<M>
+toJSON(): Record<string, any>
 ```

-Convert the embedding function to a JSON object
-It is used to serialize the embedding function to the schema
-It's important that any object returned by this method contains all the necessary
-information to recreate the embedding function
-
-It should return the same object that was passed to the constructor
-If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
+Get the original arguments to the constructor, to serialize them so they
+can be used to recreate the embedding function later.

 #### Returns

-`Partial`&lt;`M`&gt;
-
-#### Example
-
-```ts
-class MyEmbeddingFunction extends EmbeddingFunction {
-  constructor(options: {model: string, timeout: number}) {
-    super();
-    this.model = options.model;
-    this.timeout = options.timeout;
-  }
-  toJSON() {
-    return {
-      model: this.model,
-      timeout: this.timeout,
-    };
-}
-```
+`Record`&lt;`string`, `any`&gt;

 #### Inherited from

--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
--- a/docs/src/python/polars_arrow.md
+++ b/docs/src/python/polars_arrow.md
@@ -9,23 +9,50 @@ LanceDB supports [Polars](https://github.com/pola-rs/polars), a blazingly fast D

 First, we connect to a LanceDB database.

+=== "Sync API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:import-lancedb"
+    --8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb"
+    ```
+
+=== "Async API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:import-lancedb"
+    --8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb_async"
+    ```

-```py
--8<-- "python/python/tests/docs/test_python.py:import-lancedb"
--8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb"
-```

 We can load a Polars `DataFrame` to LanceDB directly.

-```py
--8<-- "python/python/tests/docs/test_python.py:import-polars"
--8<-- "python/python/tests/docs/test_python.py:create_table_polars"
-```
+=== "Sync API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:import-polars"
+    --8<-- "python/python/tests/docs/test_python.py:create_table_polars"
+    ```
+
+=== "Async API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:import-polars"
+    --8<-- "python/python/tests/docs/test_python.py:create_table_polars_async"
+    ```
+
 We can now perform similarity search via the LanceDB Python API.

-```py
--8<-- "python/python/tests/docs/test_python.py:vector_search_polars"
-```
+=== "Sync API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:vector_search_polars"
+    ```
+
+=== "Async API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:vector_search_polars_async"
+    ```

 In addition to the selected columns, LanceDB also returns a vector
 and also the `_distance` column which is the distance between the query
@@ -112,4 +139,3 @@ The reason it's beneficial to not convert the LanceDB Table
 to a DataFrame is because the table can potentially be way larger
 than memory, and Polars LazyFrames allow us to work with such
 larger-than-memory datasets by not loading it into memory all at once.
-
--- a/docs/src/python/pydantic.md
+++ b/docs/src/python/pydantic.md
@@ -2,14 +2,19 @@

 [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
 LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
+Using [LanceModel][lancedb.pydantic.LanceModel], users can seamlessly
+integrate Pydantic with the rest of the LanceDB APIs.

-## Schema
+```python

-LanceDB supports to create Apache Arrow Schema from a
-[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
-via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:imports"
+
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_model"
+
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:set_url"
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_example"
+```

-::: lancedb.pydantic.pydantic_to_schema

 ## Vector Field

@@ -34,3 +39,9 @@ Current supported type conversions:
 | `list`              | `pyarrow.List`    |
 | `BaseModel`         | `pyarrow.Struct`    |
 | `Vector(n)`         | `pyarrow.FixedSizeList(float32, n)` |
+
+LanceDB supports to create Apache Arrow Schema from a
+[Pydantic BaseModel][pydantic.BaseModel]
+via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
+
+::: lancedb.pydantic.pydantic_to_schema
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -59,8 +59,6 @@ is also an [asynchronous API client](#connections-asynchronous).

 ::: lancedb.embeddings.open_clip.OpenClipEmbeddings

-::: lancedb.embeddings.utils.with_embeddings
-
 ## Context

 ::: lancedb.context.contextualize
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -15,7 +15,7 @@ Currently, LanceDB supports the following metrics:

 | Metric    | Description                                                                 |
 | --------- | --------------------------------------------------------------------------- |
-| `l2`      | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
+| `l2`      | [Euclidean / l2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
 | `cosine`  | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)        |
 | `dot`     | [Dot Production](https://en.wikipedia.org/wiki/Dot_product)                 |
 | `hamming` | [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance)          |
@@ -138,6 +138,19 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
        --8<-- "python/python/tests/docs/test_binary_vector.py:async_binary_vector"
        ```

+    === "TypeScript"
+
+        ```ts
+        --8<-- "nodejs/examples/search.test.ts:import"
+
+        --8<-- "nodejs/examples/search.test.ts:import_bin_util"
+
+        --8<-- "nodejs/examples/search.test.ts:ingest_binary_data"
+
+        --8<-- "nodejs/examples/search.test.ts:search_binary_data"
+        ```
+
+
 ## Multivector type

 LanceDB supports multivector type, this is useful when you have multiple vectors for a single item (e.g. with ColBert and ColPali).
--- a/docs/src/search_legacy.ts
+++ b/docs/src/search_legacy.ts
@@ -20,6 +20,7 @@ async function setup() {
 }

 async () => {
+  console.log("search_legacy.ts: start");
  await setup();

  // --8<-- [start:search1]
@@ -37,5 +38,5 @@ async () => {
    .execute();
  // --8<-- [end:search2]

-  console.log("search: done");
+  console.log("search_legacy.ts: done");
 };
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -7,7 +7,7 @@ performed on the top-k results returned by the vector search. However, pre-filte
 option that performs the filter prior to vector search. This can be useful to narrow down
 the search space of a very large dataset to reduce query latency.

-Note that both pre-filtering and post-filtering can yield false positives. For pre-filtering, if the filter is too selective, it might eliminate relevant items that the vector search would have otherwise identified as a good match. In this case, increasing `nprobes` parameter will help reduce such false positives. It is recommended to set `use_index=false` if you know that the filter is highly selective.
+Note that both pre-filtering and post-filtering can yield false positives. For pre-filtering, if the filter is too selective, it might eliminate relevant items that the vector search would have otherwise identified as a good match. In this case, increasing `nprobes` parameter will help reduce such false positives. It is recommended to call `bypass_vector_index()` if you know that the filter is highly selective.

 Similarly, a highly selective post-filter can lead to false positives. Increasing both `nprobes` and `refine_factor` can mitigate this issue. When deciding between pre-filtering and post-filtering, pre-filtering is generally the safer choice if you're uncertain.

--- a/docs/src/sql_legacy.ts
+++ b/docs/src/sql_legacy.ts
@@ -1,6 +1,7 @@
 import * as vectordb from "vectordb";

 (async () => {
+  console.log("sql_legacy.ts: start");
  const db = await vectordb.connect("data/sample-lancedb");

  let data = [];
@@ -34,5 +35,5 @@ import * as vectordb from "vectordb";
  await tbl.filter("id = 10").limit(10).execute();
  // --8<-- [end:sql_search]

-  console.log("SQL search: done");
+  console.log("sql_legacy.ts: done");
 })();
--- a/docs/src/troubleshooting.md
+++ b/docs/src/troubleshooting.md
@@ -8,6 +8,10 @@ For trouble shooting, the best place to ask is in our Discord, under the relevan
 language channel. By asking in the language-specific channel, it makes it more
 likely that someone who knows the answer will see your question.

+## Common issues
+
+* Multiprocessing with `fork` is not supported. You should use `spawn` instead.
+
 ## Enabling logging

 To provide more information, especially for LanceDB Cloud related issues, enable
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -15,6 +15,7 @@ excluded_globs = [
    "../src/python/duckdb.md",
    "../src/python/pandas_and_pyarrow.md",
    "../src/python/polars_arrow.md",
+    "../src/python/pydantic.md",
    "../src/embeddings/*.md",
    "../src/concepts/*.md",
    "../src/ann_indexes.md",
--- a/java/.gitignore
+++ b/java/.gitignore
@@ -0,0 +1,3 @@
+*.iml
+.java-version
+
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,13 +8,16 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.15.1-beta.3</version>
+        <version>0.18.2-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>lancedb-core</artifactId>
    <name>LanceDB Core</name>
    <packaging>jar</packaging>
+    <properties>
+        <rust.release.build>false</rust.release.build>
+    </properties>

    <dependencies>
        <dependency>
@@ -68,7 +71,7 @@
                                </goals>
                                <configuration>
                                    <path>lancedb-jni</path>
-                                    <release>true</release>
+                                    <release>${rust.release.build}</release>
                                    <!-- Copy native libraries to target/classes for runtime access -->
                                    <copyTo>${project.build.directory}/classes/nativelib</copyTo>
                                    <copyWithPlatformDir>true</copyWithPlatformDir>
--- a/java/core/src/main/java/com/lancedb/lancedb/Connection.java
+++ b/java/core/src/main/java/com/lancedb/lancedb/Connection.java
@@ -1,16 +1,25 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package com.lancedb.lancedb;

 import io.questdb.jar.jni.JarJniLoader;
+
 import java.io.Closeable;
 import java.util.List;
 import java.util.Optional;

-/**
- * Represents LanceDB database.
- */
+/** Represents LanceDB database. */
 public class Connection implements Closeable {
  static {
    JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
@@ -18,14 +27,11 @@ public class Connection implements Closeable {

  private long nativeConnectionHandle;

-  /**
-   * Connect to a LanceDB instance.
-   */
+  /** Connect to a LanceDB instance. */
  public static native Connection connect(String uri);

  /**
-   * Get the names of all tables in the database. The names are sorted in
-   * ascending order.
+   * Get the names of all tables in the database. The names are sorted in ascending order.
   *
   * @return the table names
   */
@@ -34,8 +40,7 @@ public class Connection implements Closeable {
  }

  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
+   * Get the names of filtered tables in the database. The names are sorted in ascending order.
   *
   * @param limit The number of results to return.
   * @return the table names
@@ -45,12 +50,11 @@ public class Connection implements Closeable {
  }

  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
+   * Get the names of filtered tables in the database. The names are sorted in ascending order.
   *
   * @param startAfter If present, only return names that come lexicographically after the supplied
-   *                   value. This can be combined with limit to implement pagination
-   *                   by setting this to the last table name from the previous page.
+   *     value. This can be combined with limit to implement pagination by setting this to the last
+   *     table name from the previous page.
   * @return the table names
   */
  public List<String> tableNames(String startAfter) {
@@ -58,12 +62,11 @@ public class Connection implements Closeable {
  }

  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
+   * Get the names of filtered tables in the database. The names are sorted in ascending order.
   *
   * @param startAfter If present, only return names that come lexicographically after the supplied
-   *                   value. This can be combined with limit to implement pagination
-   *                   by setting this to the last table name from the previous page.
+   *     value. This can be combined with limit to implement pagination by setting this to the last
+   *     table name from the previous page.
   * @param limit The number of results to return.
   * @return the table names
   */
@@ -72,22 +75,19 @@ public class Connection implements Closeable {
  }

  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
+   * Get the names of filtered tables in the database. The names are sorted in ascending order.
   *
   * @param startAfter If present, only return names that come lexicographically after the supplied
-   *                   value. This can be combined with limit to implement pagination
-   *                   by setting this to the last table name from the previous page.
+   *     value. This can be combined with limit to implement pagination by setting this to the last
+   *     table name from the previous page.
   * @param limit The number of results to return.
   * @return the table names
   */
-  public native List<String> tableNames(
-      Optional<String> startAfter, Optional<Integer> limit);
+  public native List<String> tableNames(Optional<String> startAfter, Optional<Integer> limit);

  /**
-   * Closes this connection and releases any system resources associated with it. If
-   * the connection is
-   * already closed, then invoking this method has no effect.
+   * Closes this connection and releases any system resources associated with it. If the connection
+   * is already closed, then invoking this method has no effect.
   */
  @Override
  public void close() {
@@ -98,8 +98,7 @@ public class Connection implements Closeable {
  }

  /**
-   * Native method to release the Lance connection resources associated with the
-   * given handle.
+   * Native method to release the Lance connection resources associated with the given handle.
   *
   * @param handle The native handle to the connection resource.
   */
--- a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
+++ b/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
@@ -1,27 +1,35 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package com.lancedb.lancedb;

-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.nio.file.Path;
-import java.util.List;
-import java.net.URL;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;

+import java.net.URL;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 public class ConnectionTest {
  private static final String[] TABLE_NAMES = {
-      "dataset_version",
-      "new_empty_dataset",
-      "test",
-      "write_stream"
+    "dataset_version", "new_empty_dataset", "test", "write_stream"
  };

-  @TempDir
-  static Path tempDir; // Temporary directory for the tests
+  @TempDir static Path tempDir; // Temporary directory for the tests
  private static URL lanceDbURL;

  @BeforeAll
@@ -53,18 +61,21 @@ public class ConnectionTest {
  @Test
  void tableNamesStartAfter() {
    try (Connection conn = Connection.connect(lanceDbURL.toString())) {
-      assertTableNamesStartAfter(conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
+      assertTableNamesStartAfter(
+          conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
      assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
      assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
      assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
-      assertTableNamesStartAfter(conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
+      assertTableNamesStartAfter(
+          conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
      assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
      assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
      assertTableNamesStartAfter(conn, "z_dataset", 0);
    }
  }

-  private void assertTableNamesStartAfter(Connection conn, String startAfter, int expectedSize, String... expectedNames) {
+  private void assertTableNamesStartAfter(
+      Connection conn, String startAfter, int expectedSize, String... expectedNames) {
    List<String> tableNames = conn.tableNames(startAfter);
    assertEquals(expectedSize, tableNames.size());
    for (int i = 0; i < expectedNames.length; i++) {
@@ -74,7 +85,7 @@ public class ConnectionTest {

  @Test
  void tableNamesLimit() {
-      try (Connection conn = Connection.connect(lanceDbURL.toString())) {
+    try (Connection conn = Connection.connect(lanceDbURL.toString())) {
      for (int i = 0; i <= TABLE_NAMES.length; i++) {
        List<String> tableNames = conn.tableNames(i);
        assertEquals(i, tableNames.size());
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.15.1-beta.3</version>
+    <version>0.18.2-beta.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
@@ -29,6 +29,25 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
+        <spotless.skip>false</spotless.skip>
+        <spotless.version>2.30.0</spotless.version>
+        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
+        <spotless.delimiter>package</spotless.delimiter>
+        <spotless.license.header>
+            /*
+            * Licensed under the Apache License, Version 2.0 (the "License");
+            * you may not use this file except in compliance with the License.
+            * You may obtain a copy of the License at
+            *
+            *     http://www.apache.org/licenses/LICENSE-2.0
+            *
+            * Unless required by applicable law or agreed to in writing, software
+            * distributed under the License is distributed on an "AS IS" BASIS,
+            * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+            * See the License for the specific language governing permissions and
+            * limitations under the License.
+            */
+        </spotless.license.header>
    </properties>

    <modules>
@@ -127,7 +146,8 @@
                <configuration>
                    <configLocation>google_checks.xml</configLocation>
                    <consoleOutput>true</consoleOutput>
-                    <failsOnError>true</failsOnError>
+                    <failsOnError>false</failsOnError>
+                    <failOnViolation>false</failOnViolation>
                    <violationSeverity>warning</violationSeverity>
                    <linkXRef>false</linkXRef>
                </configuration>
@@ -141,6 +161,10 @@
                    </execution>
                </executions>
            </plugin>
+            <plugin>
+                <groupId>com.diffplug.spotless</groupId>
+                <artifactId>spotless-maven-plugin</artifactId>
+            </plugin>
        </plugins>
        <pluginManagement>
            <plugins>
@@ -166,7 +190,6 @@
                    <artifactId>maven-surefire-plugin</artifactId>
                    <version>3.2.5</version>
                    <configuration>
-                        <argLine>--add-opens=java.base/java.nio=ALL-UNNAMED</argLine>
                        <forkNode
                            implementation="org.apache.maven.plugin.surefire.extensions.SurefireForkNodeFactory" />
                        <useSystemClassLoader>false</useSystemClassLoader>
@@ -180,6 +203,54 @@
                    <artifactId>maven-install-plugin</artifactId>
                    <version>2.5.2</version>
                </plugin>
+                <plugin>
+                    <groupId>com.diffplug.spotless</groupId>
+                    <artifactId>spotless-maven-plugin</artifactId>
+                    <version>${spotless.version}</version>
+                    <configuration>
+                        <skip>${spotless.skip}</skip>
+                        <upToDateChecking>
+                            <enabled>true</enabled>
+                        </upToDateChecking>
+                        <java>
+                            <includes>
+                                <include>src/main/java/**/*.java</include>
+                                <include>src/test/java/**/*.java</include>
+                            </includes>
+                            <googleJavaFormat>
+                                <version>${spotless.java.googlejavaformat.version}</version>
+                                <style>GOOGLE</style>
+                            </googleJavaFormat>
+
+                            <importOrder>
+                                <order>com.lancedb.lance,,javax,java,\#</order>
+                            </importOrder>
+
+                            <removeUnusedImports />
+                        </java>
+                        <scala>
+                            <includes>
+                                <include>src/main/scala/**/*.scala</include>
+                                <include>src/main/scala-*/**/*.scala</include>
+                                <include>src/test/scala/**/*.scala</include>
+                                <include>src/test/scala-*/**/*.scala</include>
+                            </includes>
+                        </scala>
+                        <licenseHeader>
+                            <content>${spotless.license.header}</content>
+                            <delimiter>${spotless.delimiter}</delimiter>
+                        </licenseHeader>
+                    </configuration>
+                    <executions>
+                        <execution>
+                            <id>spotless-check</id>
+                            <phase>validate</phase>
+                            <goals>
+                                <goal>apply</goal>
+                            </goals>
+                        </execution>
+                    </executions>
+                </plugin>
            </plugins>
        </pluginManagement>
    </build>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.15.1-beta.3",
+  "version": "0.18.2-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.15.1-beta.3",
+      "version": "0.18.2-beta.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,14 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.15.1-beta.3",
-        "@lancedb/vectordb-darwin-x64": "0.15.1-beta.3",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.3",
-        "@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.3",
-        "@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.3",
-        "@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.3",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.3",
-        "@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.3"
+        "@lancedb/vectordb-darwin-arm64": "0.18.2-beta.0",
+        "@lancedb/vectordb-darwin-x64": "0.18.2-beta.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.18.2-beta.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.18.2-beta.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.18.2-beta.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -330,9 +327,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.15.1-beta.3.tgz",
-      "integrity": "sha512-2GinbODdSsUc+zJQ4BFZPsdraPWHJpDpGf7CsZIqfokwxIRnzVzFfQy+SZhmNhKzFkmtW21yWw6wrJ4FgS7Qtw==",
+      "version": "0.18.2-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.18.2-beta.0.tgz",
+      "integrity": "sha512-FzIcElkS6R5I5kU1S5m7yLVTB1Duv1XcmZQtVmYl/JjNlfxS1WTtMzdzMqSBFohDcgU2Tkc5+1FpK1B94dUUbg==",
      "cpu": [
        "arm64"
      ],
@@ -343,9 +340,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.15.1-beta.3.tgz",
-      "integrity": "sha512-nRp5eN6yvx5kvfDEQuh3EHCmwjVNCIm7dXoV6BasepFkOoaHHmjKSIUFW7HjtJOfdFbb+r8UjBJx4cN6Jh2iFg==",
+      "version": "0.18.2-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.18.2-beta.0.tgz",
+      "integrity": "sha512-jv+XludfLNBDm1DjdqyghwDMtd4E+ygwycQpkpK72wyZSh6Qytrgq+4dNi/zCZ3UChFLbKbIxrVxv9yENQn2Pg==",
      "cpu": [
        "x64"
      ],
@@ -356,22 +353,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.15.1-beta.3.tgz",
-      "integrity": "sha512-JOyD7Nt3RSfHGWNQjHbZMHsIw1cVWPySxbtDmDqk5QH5IfgDNZLiz/sNbROuQkNvc5SsC6wUmhBUwWBETzW7/g==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-arm64-musl": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.15.1-beta.3.tgz",
-      "integrity": "sha512-4jTHl1i/4e7wP2U7RMjHr87/gsGJ9tfRJ4ljQIfV+LkA7ROMd/TA5XSnvPesQCDjPNRI4wAyb/BmK18V96VqBg==",
+      "version": "0.18.2-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.18.2-beta.0.tgz",
+      "integrity": "sha512-8/fBpbNYhhpetf/pZv0DyPnQkeAbsiICMyCoRiNu5auvQK4AsGF1XvLWrDi68u9F0GysBKvuatYuGqa/yh+Anw==",
      "cpu": [
        "arm64"
      ],
@@ -382,9 +366,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.15.1-beta.3.tgz",
-      "integrity": "sha512-odrNqB/bGL+sweZi6ed9sKft/H5/bca/tDVG/Y39xCJ6swPWxXQK2Zpn7EjqbccI2p2zkrhKcOUBO/bEkOqQng==",
+      "version": "0.18.2-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.18.2-beta.0.tgz",
+      "integrity": "sha512-7a1Kc/2V2ff4HlLzXyXVdK0Z0VIFUt50v2SBRdlcycJ0NLW9ZqV+9UjB/NAOwMXVgYd7d3rKjACGkQzkpvcyeg==",
      "cpu": [
        "x64"
      ],
@@ -394,36 +378,10 @@
        "linux"
      ]
    },
-    "node_modules/@lancedb/vectordb-linux-x64-musl": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.15.1-beta.3.tgz",
-      "integrity": "sha512-Zml4KgQWzkkMBHZiD30Gs3N56BT5xO01efwO/Q2qB7JKw5Vy9pa6SgFf9woBvKFQRY73fiKqafy+BmGHTgozNg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-win32-arm64-msvc": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.15.1-beta.3.tgz",
-      "integrity": "sha512-3BWkK+8JP+js/KoTad7bm26NTR5pq2tvXJkrFB0eaFfsIuUXebS+LIBF22f39He2WMpq3YojT0bMnYxp8qvRkQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "win32"
-      ]
-    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.15.1-beta.3",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.15.1-beta.3.tgz",
-      "integrity": "sha512-jr8SEisYAX7pQHIbxIDJPkANmxWh5Yohm8ELbMgu76IvLI7bsS7sB9ID+kcj1SiS5m4V6OG2BO1FrEYbPLZ6Dg==",
+      "version": "0.18.2-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.18.2-beta.0.tgz",
+      "integrity": "sha512-EeCiSf2RtJMESnkIca28GI6rAStYj2q9sVIyNCXpmIZSkJVpfQ3iswHGAbHrEfaPl0J1Re9cnRHLLuqkumwiIQ==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.15.1-beta.3",
+  "version": "0.18.2-beta.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -85,20 +85,14 @@
      "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
      "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
      "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
-      "x86_64-unknown-linux-musl": "@lancedb/vectordb-linux-x64-musl",
-      "aarch64-unknown-linux-musl": "@lancedb/vectordb-linux-arm64-musl",
-      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc",
-      "aarch64-pc-windows-msvc": "@lancedb/vectordb-win32-arm64-msvc"
+      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.15.1-beta.3",
-    "@lancedb/vectordb-darwin-arm64": "0.15.1-beta.3",
-    "@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.3",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.3",
-    "@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.3",
-    "@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.3",
-    "@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.3",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.3"
+    "@lancedb/vectordb-darwin-x64": "0.18.2-beta.0",
+    "@lancedb/vectordb-darwin-arm64": "0.18.2-beta.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.18.2-beta.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.18.2-beta.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.18.2-beta.0"
  }
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -1299,7 +1299,7 @@ export interface IvfPQIndexConfig {
  index_name?: string

  /**
-   * Metric type, L2 or Cosine
+   * Metric type, l2 or Cosine
   */
  metric_type?: MetricType

--- a/nodejs/.npmignore
+++ b/nodejs/.npmignore
@@ -22,3 +22,4 @@ build.rs
 jest.config.js
 tsconfig.json
 typedoc.json
+typedoc_post_process.js
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.15.1-beta.3"
+version = "0.18.2-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
@@ -18,7 +18,7 @@ arrow-array.workspace = true
 arrow-schema.workspace = true
 env_logger.workspace = true
 futures.workspace = true
-lancedb = { path = "../rust/lancedb", features = ["remote"] }
+lancedb = { path = "../rust/lancedb" }
 napi = { version = "2.16.8", default-features = false, features = [
    "napi9",
    "async"
@@ -30,3 +30,8 @@ log.workspace = true

 [build-dependencies]
 napi-build = "2.1"
+
+[features]
+default = ["remote"]
+fp16kernels = ["lancedb/fp16kernels"]
+remote = ["lancedb/remote"]
--- a/nodejs/README.md
+++ b/nodejs/README.md
@@ -11,11 +11,9 @@ npm install @lancedb/lancedb
 This will download the appropriate native library for your platform. We currently
 support:

- Linux (x86_64 and aarch64)
+- Linux (x86_64 and aarch64 on glibc and musl)
 - MacOS (Intel and ARM/M1/M2)
- Windows (x86_64 only)
-
-We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
+- Windows (x86_64 and aarch64)

 ## Usage

--- a/nodejs/test/embedding.test.ts
+++ b/nodejs/test/embedding.test.ts
@@ -17,6 +17,8 @@ import {
 import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
 import { getRegistry, register } from "../lancedb/embedding/registry";

+const testOpenAIInteg = process.env.OPENAI_API_KEY == null ? test.skip : test;
+
 describe("embedding functions", () => {
  let tmpDir: tmp.DirResult;
  beforeEach(() => {
@@ -29,9 +31,6 @@ describe("embedding functions", () => {

  it("should be able to create a table with an embedding function", async () => {
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -75,9 +74,6 @@ describe("embedding functions", () => {
  it("should be able to append and upsert using embedding function", async () => {
    @register()
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -143,9 +139,6 @@ describe("embedding functions", () => {
  it("should be able to create an empty table with an embedding function", async () => {
    @register()
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -194,9 +187,6 @@ describe("embedding functions", () => {
  it("should error when appending to a table with an unregistered embedding function", async () => {
    @register("mock")
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -241,13 +231,35 @@ describe("embedding functions", () => {
      `Function "mock" not found in registry`,
    );
  });
+
+  testOpenAIInteg("propagates variables through all methods", async () => {
+    delete process.env.OPENAI_API_KEY;
+    const registry = getRegistry();
+    registry.setVar("openai_api_key", "sk-...");
+    const func = registry.get("openai")?.create({
+      model: "text-embedding-ada-002",
+      apiKey: "$var:openai_api_key",
+    }) as EmbeddingFunction;
+
+    const db = await connect("memory://");
+    const wordsSchema = LanceSchema({
+      text: func.sourceField(new Utf8()),
+      vector: func.vectorField(),
+    });
+    const tbl = await db.createEmptyTable("words", wordsSchema, {
+      mode: "overwrite",
+    });
+    await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
+
+    const query = "greetings";
+    const actual = (await tbl.search(query).limit(1).toArray())[0];
+    expect(actual).toHaveProperty("text");
+  });
+
  test.each([new Float16(), new Float32(), new Float64()])(
    "should be able to provide manual embeddings with multiple float datatype",
    async (floatType) => {
      class MockEmbeddingFunction extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
        ndims() {
          return 3;
        }
@@ -292,10 +304,6 @@ describe("embedding functions", () => {
    async (floatType) => {
      @register("test1")
      class MockEmbeddingFunctionWithoutNDims extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
-
        embeddingDataType(): Float {
          return floatType;
        }
@@ -310,9 +318,6 @@ describe("embedding functions", () => {
      }
      @register("test")
      class MockEmbeddingFunction extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
        ndims() {
          return 3;
        }
--- a/nodejs/test/registry.test.ts
+++ b/nodejs/test/registry.test.ts
@@ -11,7 +11,11 @@ import * as arrow18 from "apache-arrow-18";
 import * as tmp from "tmp";

 import { connect } from "../lancedb";
-import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
+import {
+  EmbeddingFunction,
+  FunctionOptions,
+  LanceSchema,
+} from "../lancedb/embedding";
 import { getRegistry, register } from "../lancedb/embedding/registry";

 describe.each([arrow15, arrow16, arrow17, arrow18])("LanceSchema", (arrow) => {
@@ -39,11 +43,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
  it("should register a new item to the registry", async () => {
    @register("mock-embedding")
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
      constructor() {
        super();
      }
@@ -89,11 +88,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
  });
  test("should error if registering with the same name", async () => {
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
      constructor() {
        super();
      }
@@ -114,13 +108,9 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
  });
  test("schema should contain correct metadata", async () => {
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
-      constructor() {
+      constructor(args: FunctionOptions = {}) {
        super();
+        this.resolveVariables(args);
      }
      ndims() {
        return 3;
@@ -132,7 +122,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
        return data.map(() => [1, 2, 3]);
      }
    }
-    const func = new MockEmbeddingFunction();
+    const func = new MockEmbeddingFunction({ someText: "hello" });

    const schema = LanceSchema({
      id: new arrow.Int32(),
@@ -155,3 +145,79 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
    expect(schema.metadata).toEqual(expectedMetadata);
  });
 });
+
+describe("Registry.setVar", () => {
+  const registry = getRegistry();
+
+  beforeEach(() => {
+    @register("mock-embedding")
+    // biome-ignore lint/correctness/noUnusedVariables :
+    class MockEmbeddingFunction extends EmbeddingFunction<string> {
+      constructor(optionsRaw: FunctionOptions = {}) {
+        super();
+        const options = this.resolveVariables(optionsRaw);
+
+        expect(optionsRaw["someKey"].startsWith("$var:someName")).toBe(true);
+        expect(options["someKey"]).toBe("someValue");
+
+        if (options["secretKey"]) {
+          expect(optionsRaw["secretKey"]).toBe("$var:secretKey");
+          expect(options["secretKey"]).toBe("mySecret");
+        }
+      }
+      async computeSourceEmbeddings(data: string[]) {
+        return data.map(() => [1, 2, 3]);
+      }
+      embeddingDataType() {
+        return new arrow18.Float32() as apiArrow.Float;
+      }
+      protected getSensitiveKeys() {
+        return ["secretKey"];
+      }
+    }
+  });
+  afterEach(() => {
+    registry.reset();
+  });
+
+  it("Should error if the variable is not set", () => {
+    console.log(registry.get("mock-embedding"));
+    expect(() =>
+      registry.get("mock-embedding")!.create({ someKey: "$var:someName" }),
+    ).toThrow('Variable "someName" not found');
+  });
+
+  it("should use default values if not set", () => {
+    registry
+      .get("mock-embedding")!
+      .create({ someKey: "$var:someName:someValue" });
+  });
+
+  it("should set a variable that the embedding function understand", () => {
+    registry.setVar("someName", "someValue");
+    registry.get("mock-embedding")!.create({ someKey: "$var:someName" });
+  });
+
+  it("should reject secrets that aren't passed as variables", () => {
+    registry.setVar("someName", "someValue");
+    expect(() =>
+      registry
+        .get("mock-embedding")!
+        .create({ secretKey: "someValue", someKey: "$var:someName" }),
+    ).toThrow(
+      'The key "secretKey" is sensitive and cannot be set directly. Please use the $var: syntax to set it.',
+    );
+  });
+
+  it("should not serialize secrets", () => {
+    registry.setVar("someName", "someValue");
+    registry.setVar("secretKey", "mySecret");
+    const func = registry
+      .get("mock-embedding")!
+      .create({ secretKey: "$var:secretKey", someKey: "$var:someName" });
+    expect(func.toJSON()).toEqual({
+      secretKey: "$var:secretKey",
+      someKey: "$var:someName",
+    });
+  });
+});
--- a/nodejs/test/s3_integration.test.ts
+++ b/nodejs/test/s3_integration.test.ts
@@ -175,6 +175,8 @@ maybeDescribe("storage_options", () => {

    tableNames = await db.tableNames();
    expect(tableNames).toEqual([]);
+
+    await db.dropAllTables();
  });

  it("can configure encryption at connection and table level", async () => {
@@ -210,6 +212,8 @@ maybeDescribe("storage_options", () => {
    await table.add([{ a: 2, b: 3 }]);

    await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
+
+    await db.dropAllTables();
  });
 });

@@ -298,5 +302,32 @@ maybeDescribe("DynamoDB Lock", () => {

    const rowCount = await table.countRows();
    expect(rowCount).toBe(6);
+
+    await db.dropAllTables();
+  });
+
+  it("clears dynamodb state after dropping all tables", async () => {
+    const uri = `s3+ddb://${bucket.name}/test?ddbTableName=${commitTable.name}`;
+    const db = await connect(uri, {
+      storageOptions: CONFIG,
+      readConsistencyInterval: 0,
+    });
+
+    await db.createTable("foo", [{ a: 1, b: 2 }]);
+    await db.createTable("bar", [{ a: 1, b: 2 }]);
+
+    let tableNames = await db.tableNames();
+    expect(tableNames).toEqual(["bar", "foo"]);
+
+    await db.dropAllTables();
+    tableNames = await db.tableNames();
+    expect(tableNames).toEqual([]);
+
+    // We can create a new table with the same name as the one we dropped.
+    await db.createTable("foo", [{ a: 1, b: 2 }]);
+    tableNames = await db.tableNames();
+    expect(tableNames).toEqual(["foo"]);
+
+    await db.dropAllTables();
  });
 });
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -21,9 +21,11 @@ import {
  Int64,
  List,
  Schema,
+  Uint8,
  Utf8,
  makeArrowTable,
 } from "../lancedb/arrow";
+import * as arrow from "../lancedb/arrow";
 import {
  EmbeddingFunction,
  LanceSchema,
@@ -278,6 +280,15 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(res.getChild("y")?.toJSON()).toEqual([2, null, null, null]);
      expect(res.getChild("z")?.toJSON()).toEqual([null, null, 3n, 5n]);
    });
+
+    it("should handle null vectors at end of data", async () => {
+      // https://github.com/lancedb/lancedb/issues/2240
+      const data = [{ vector: [1, 2, 3] }, { vector: null }];
+      const db = await connect("memory://");
+
+      const table = await db.createTable("my_table", data);
+      expect(await table.countRows()).toEqual(2);
+    });
  },
 );

@@ -460,6 +471,8 @@ describe("When creating an index", () => {
      indexType: "IvfPq",
      columns: ["vec"],
    });
+    const stats = await tbl.indexStats("vec_idx");
+    expect(stats?.loss).toBeDefined();

    // Search without specifying the column
    let rst = await tbl
@@ -666,11 +679,11 @@ describe("When creating an index", () => {
    expect(fs.readdirSync(indexDir)).toHaveLength(1);

    for await (const r of tbl.query().where("id > 1").select(["id"])) {
-      expect(r.numRows).toBe(10);
+      expect(r.numRows).toBe(298);
    }
    // should also work with 'filter' alias
    for await (const r of tbl.query().filter("id > 1").select(["id"])) {
-      expect(r.numRows).toBe(10);
+      expect(r.numRows).toBe(298);
    }
  });

@@ -720,6 +733,7 @@ describe("When creating an index", () => {
    expect(stats?.distanceType).toBeUndefined();
    expect(stats?.indexType).toEqual("BTREE");
    expect(stats?.numIndices).toEqual(1);
+    expect(stats?.loss).toBeUndefined();
  });

  test("when getting stats on non-existent index", async () => {
@@ -727,6 +741,38 @@ describe("When creating an index", () => {
    expect(stats).toBeUndefined();
  });

+  test("create ivf_flat with binary vectors", async () => {
+    const db = await connect(tmpDir.name);
+    const binarySchema = new Schema([
+      new Field("id", new Int32(), true),
+      new Field("vec", new FixedSizeList(32, new Field("item", new Uint8()))),
+    ]);
+    const tbl = await db.createTable(
+      "binary",
+      makeArrowTable(
+        Array(300)
+          .fill(1)
+          .map((_, i) => ({
+            id: i,
+            vec: Array(32)
+              .fill(1)
+              .map(() => Math.floor(Math.random() * 255)),
+          })),
+        { schema: binarySchema },
+      ),
+    );
+    await tbl.createIndex("vec", {
+      config: Index.ivfFlat({ numPartitions: 10, distanceType: "hamming" }),
+    });
+
+    // query with binary vectors
+    const queryVec = Array(32)
+      .fill(1)
+      .map(() => Math.floor(Math.random() * 255));
+    const rst = await tbl.query().limit(5).nearestTo(queryVec).toArrow();
+    expect(rst.numRows).toBe(5);
+  });
+
  // TODO: Move this test to the query API test (making sure we can reject queries
  // when the dimension is incorrect)
  test("two columns with different dimensions", async () => {
@@ -920,6 +966,93 @@ describe("schema evolution", function () {
      new Field("price", new Float64(), true),
    ]);
    expect(await table.schema()).toEqual(expectedSchema2);
+
+    await table.alterColumns([
+      {
+        path: "vector",
+        dataType: new FixedSizeList(2, new Field("item", new Float64(), true)),
+      },
+    ]);
+    const expectedSchema3 = new Schema([
+      new Field("new_id", new Int32(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float64(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema3);
+  });
+
+  it("can cast to various types", async function () {
+    const con = await connect(tmpDir.name);
+
+    // integers
+    const intTypes = [
+      new arrow.Int8(),
+      new arrow.Int16(),
+      new arrow.Int32(),
+      new arrow.Int64(),
+      new arrow.Uint8(),
+      new arrow.Uint16(),
+      new arrow.Uint32(),
+      new arrow.Uint64(),
+    ];
+    const tableInts = await con.createTable("ints", [{ id: 1n }], {
+      schema: new Schema([new Field("id", new Int64(), true)]),
+    });
+    for (const intType of intTypes) {
+      await tableInts.alterColumns([{ path: "id", dataType: intType }]);
+      const schema = new Schema([new Field("id", intType, true)]);
+      expect(await tableInts.schema()).toEqual(schema);
+    }
+
+    // floats
+    const floatTypes = [
+      new arrow.Float16(),
+      new arrow.Float32(),
+      new arrow.Float64(),
+    ];
+    const tableFloats = await con.createTable("floats", [{ val: 2.1 }], {
+      schema: new Schema([new Field("val", new Float32(), true)]),
+    });
+    for (const floatType of floatTypes) {
+      await tableFloats.alterColumns([{ path: "val", dataType: floatType }]);
+      const schema = new Schema([new Field("val", floatType, true)]);
+      expect(await tableFloats.schema()).toEqual(schema);
+    }
+
+    // Lists of floats
+    const listTypes = [
+      new arrow.List(new arrow.Field("item", new arrow.Float32(), true)),
+      new arrow.FixedSizeList(
+        2,
+        new arrow.Field("item", new arrow.Float64(), true),
+      ),
+      new arrow.FixedSizeList(
+        2,
+        new arrow.Field("item", new arrow.Float16(), true),
+      ),
+      new arrow.FixedSizeList(
+        2,
+        new arrow.Field("item", new arrow.Float32(), true),
+      ),
+    ];
+    const tableLists = await con.createTable("lists", [{ val: [2.1, 3.2] }], {
+      schema: new Schema([
+        new Field(
+          "val",
+          new FixedSizeList(2, new arrow.Field("item", new Float32())),
+          true,
+        ),
+      ]),
+    });
+    for (const listType of listTypes) {
+      await tableLists.alterColumns([{ path: "val", dataType: listType }]);
+      const schema = new Schema([new Field("val", listType, true)]);
+      expect(await tableLists.schema()).toEqual(schema);
+    }
  });

  it("can drop a column from the schema", async function () {
@@ -1038,9 +1171,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
    test("can search using a string", async () => {
      @register()
      class MockEmbeddingFunction extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
        ndims() {
          return 1;
        }
--- a/nodejs/examples/basic.test.ts
+++ b/nodejs/examples/basic.test.ts
@@ -132,6 +132,17 @@ test("basic table examples", async () => {
      },
    ]);
    // --8<-- [end:alter_columns]
+    // --8<-- [start:alter_columns_vector]
+    await tbl.alterColumns([
+      {
+        path: "vector",
+        dataType: new arrow.FixedSizeList(
+          2,
+          new arrow.Field("item", new arrow.Float16(), false),
+        ),
+      },
+    ]);
+    // --8<-- [end:alter_columns_vector]
    // --8<-- [start:drop_columns]
    await tbl.dropColumns(["dbl_price"]);
    // --8<-- [end:drop_columns]
--- a/nodejs/examples/embedding.test.ts
+++ b/nodejs/examples/embedding.test.ts
@@ -43,12 +43,17 @@ test("custom embedding function", async () => {

    @register("my_embedding")
    class MyEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
+      constructor(optionsRaw = {}) {
+        super();
+        const options = this.resolveVariables(optionsRaw);
+        // Initialize using options
      }
      ndims() {
        return 3;
      }
+      protected getSensitiveKeys(): string[] {
+        return [];
+      }
      embeddingDataType(): Float {
        return new Float32();
      }
@@ -94,3 +99,14 @@ test("custom embedding function", async () => {
    expect(await table2.countRows()).toBe(2);
  });
 });
+
+test("embedding function api_key", async () => {
+  // --8<-- [start:register_secret]
+  const registry = getRegistry();
+  registry.setVar("api_key", "sk-...");
+
+  const func = registry.get("openai")!.create({
+    apiKey: "$var:api_key",
+  });
+  // --8<-- [end:register_secret]
+});
--- a/nodejs/examples/search.test.ts
+++ b/nodejs/examples/search.test.ts
@@ -4,9 +4,12 @@ import { expect, test } from "@jest/globals";
 // --8<-- [start:import]
 import * as lancedb from "@lancedb/lancedb";
 // --8<-- [end:import]
+// --8<-- [start:import_bin_util]
+import { Field, FixedSizeList, Int32, Schema, Uint8 } from "apache-arrow";
+// --8<-- [end:import_bin_util]
 import { withTempDirectory } from "./util.ts";

-test("full text search", async () => {
+test("vector search", async () => {
  await withTempDirectory(async (databaseDir) => {
    {
      const db = await lancedb.connect(databaseDir);
@@ -14,8 +17,6 @@ test("full text search", async () => {
      const data = Array.from({ length: 10_000 }, (_, i) => ({
        vector: Array(128).fill(i),
        id: `${i}`,
-        content: "",
-        longId: `${i}`,
      }));

      await db.createTable("my_vectors", data);
@@ -52,5 +53,41 @@ test("full text search", async () => {
      expect(r.distance).toBeGreaterThanOrEqual(0.1);
      expect(r.distance).toBeLessThan(0.2);
    }
+
+    {
+      // --8<-- [start:ingest_binary_data]
+      const schema = new Schema([
+        new Field("id", new Int32(), true),
+        new Field("vec", new FixedSizeList(32, new Field("item", new Uint8()))),
+      ]);
+      const data = lancedb.makeArrowTable(
+        Array(1_000)
+          .fill(0)
+          .map((_, i) => ({
+            // the 256 bits would be store in 32 bytes,
+            // if your data is already in this format, you can skip the packBits step
+            id: i,
+            vec: lancedb.packBits(Array(256).fill(i % 2)),
+          })),
+        { schema: schema },
+      );
+
+      const tbl = await db.createTable("binary_table", data);
+      await tbl.createIndex("vec", {
+        config: lancedb.Index.ivfFlat({
+          numPartitions: 10,
+          distanceType: "hamming",
+        }),
+      });
+      // --8<-- [end:ingest_binary_data]
+
+      // --8<-- [start:search_binary_data]
+      const query = Array(32)
+        .fill(1)
+        .map(() => Math.floor(Math.random() * 255));
+      const results = await tbl.query().nearestTo(query).limit(10).toArrow();
+      // --8<-- [end:search_binary_data
+      expect(results.numRows).toBe(10);
+    }
  });
 });
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -8,7 +8,11 @@ import {
  Bool,
  BufferType,
  DataType,
+  DateUnit,
+  Date_,
+  Decimal,
  Dictionary,
+  Duration,
  Field,
  FixedSizeBinary,
  FixedSizeList,
@@ -21,19 +25,22 @@ import {
  LargeBinary,
  List,
  Null,
+  Precision,
  RecordBatch,
  RecordBatchFileReader,
  RecordBatchFileWriter,
  RecordBatchStreamWriter,
  Schema,
  Struct,
+  Timestamp,
+  Type,
  Utf8,
  Vector,
  makeVector as arrowMakeVector,
+  vectorFromArray as badVectorFromArray,
  makeBuilder,
  makeData,
  makeTable,
-  vectorFromArray,
 } from "apache-arrow";
 import { Buffers } from "apache-arrow/data";
 import { type EmbeddingFunction } from "./embedding/embedding_function";
@@ -179,6 +186,21 @@ export class VectorColumnOptions {
  }
 }

+// biome-ignore lint/suspicious/noExplicitAny: skip
+function vectorFromArray(data: any, type?: DataType) {
+  // Workaround for: https://github.com/apache/arrow/issues/45862
+  // If FSL type with float
+  if (DataType.isFixedSizeList(type) && DataType.isFloat(type.valueType)) {
+    const extendedData = [...data, new Array(type.listSize).fill(0.0)];
+    const array = badVectorFromArray(extendedData, type);
+    return array.slice(0, data.length);
+  } else if (type === undefined) {
+    return badVectorFromArray(data);
+  } else {
+    return badVectorFromArray(data, type);
+  }
+}
+
 /** Options to control the makeArrowTable call. */
 export class MakeArrowTableOptions {
  /*
@@ -1170,3 +1192,137 @@ function validateSchemaEmbeddings(

  return new Schema(fields, schema.metadata);
 }
+
+interface JsonDataType {
+  type: string;
+  fields?: JsonField[];
+  length?: number;
+}
+
+interface JsonField {
+  name: string;
+  type: JsonDataType;
+  nullable: boolean;
+  metadata: Map<string, string>;
+}
+
+// Matches format of https://github.com/lancedb/lance/blob/main/rust/lance/src/arrow/json.rs
+export function dataTypeToJson(dataType: DataType): JsonDataType {
+  switch (dataType.typeId) {
+    // For primitives, matches https://github.com/lancedb/lance/blob/e12bb9eff2a52f753668d4b62c52e4d72b10d294/rust/lance-core/src/datatypes.rs#L185
+    case Type.Null:
+      return { type: "null" };
+    case Type.Bool:
+      return { type: "bool" };
+    case Type.Int8:
+      return { type: "int8" };
+    case Type.Int16:
+      return { type: "int16" };
+    case Type.Int32:
+      return { type: "int32" };
+    case Type.Int64:
+      return { type: "int64" };
+    case Type.Uint8:
+      return { type: "uint8" };
+    case Type.Uint16:
+      return { type: "uint16" };
+    case Type.Uint32:
+      return { type: "uint32" };
+    case Type.Uint64:
+      return { type: "uint64" };
+    case Type.Int: {
+      const bitWidth = (dataType as Int).bitWidth;
+      const signed = (dataType as Int).isSigned;
+      const prefix = signed ? "" : "u";
+      return { type: `${prefix}int${bitWidth}` };
+    }
+    case Type.Float: {
+      switch ((dataType as Float).precision) {
+        case Precision.HALF:
+          return { type: "halffloat" };
+        case Precision.SINGLE:
+          return { type: "float" };
+        case Precision.DOUBLE:
+          return { type: "double" };
+      }
+      throw Error("Unsupported float precision");
+    }
+    case Type.Float16:
+      return { type: "halffloat" };
+    case Type.Float32:
+      return { type: "float" };
+    case Type.Float64:
+      return { type: "double" };
+    case Type.Utf8:
+      return { type: "string" };
+    case Type.Binary:
+      return { type: "binary" };
+    case Type.LargeUtf8:
+      return { type: "large_string" };
+    case Type.LargeBinary:
+      return { type: "large_binary" };
+    case Type.List:
+      return {
+        type: "list",
+        fields: [fieldToJson((dataType as List).children[0])],
+      };
+    case Type.FixedSizeList: {
+      const fixedSizeList = dataType as FixedSizeList;
+      return {
+        type: "fixed_size_list",
+        fields: [fieldToJson(fixedSizeList.children[0])],
+        length: fixedSizeList.listSize,
+      };
+    }
+    case Type.Struct:
+      return {
+        type: "struct",
+        fields: (dataType as Struct).children.map(fieldToJson),
+      };
+    case Type.Date: {
+      const unit = (dataType as Date_).unit;
+      return {
+        type: unit === DateUnit.DAY ? "date32:day" : "date64:ms",
+      };
+    }
+    case Type.Timestamp: {
+      const timestamp = dataType as Timestamp;
+      const timezone = timestamp.timezone || "-";
+      return {
+        type: `timestamp:${timestamp.unit}:${timezone}`,
+      };
+    }
+    case Type.Decimal: {
+      const decimal = dataType as Decimal;
+      return {
+        type: `decimal:${decimal.bitWidth}:${decimal.precision}:${decimal.scale}`,
+      };
+    }
+    case Type.Duration: {
+      const duration = dataType as Duration;
+      return { type: `duration:${duration.unit}` };
+    }
+    case Type.FixedSizeBinary: {
+      const byteWidth = (dataType as FixedSizeBinary).byteWidth;
+      return { type: `fixed_size_binary:${byteWidth}` };
+    }
+    case Type.Dictionary: {
+      const dict = dataType as Dictionary;
+      const indexType = dataTypeToJson(dict.indices);
+      const valueType = dataTypeToJson(dict.valueType);
+      return {
+        type: `dict:${valueType.type}:${indexType.type}:false`,
+      };
+    }
+  }
+  throw new Error("Unsupported data type");
+}
+
+function fieldToJson(field: Field): JsonField {
+  return {
+    name: field.name,
+    type: dataTypeToJson(field.type),
+    nullable: field.nullable,
+    metadata: field.metadata,
+  };
+}
--- a/nodejs/lancedb/embedding/embedding_function.ts
+++ b/nodejs/lancedb/embedding/embedding_function.ts
@@ -15,6 +15,7 @@ import {
  newVectorType,
 } from "../arrow";
 import { sanitizeType } from "../sanitize";
+import { getRegistry } from "./registry";

 /**
 * Options for a given embedding function
@@ -32,6 +33,22 @@ export interface EmbeddingFunctionConstructor<

 /**
 * An embedding function that automatically creates vector representation for a given column.
+ *
+ * It's important subclasses pass the **original** options to the super constructor
+ * and then pass those options to `resolveVariables` to resolve any variables before
+ * using them.
+ *
+ * @example
+ * ```ts
+ * class MyEmbeddingFunction extends EmbeddingFunction {
+ *   constructor(options: {model: string, timeout: number}) {
+ *     super(optionsRaw);
+ *     const options = this.resolveVariables(optionsRaw);
+ *     this.model = options.model;
+ *     this.timeout = options.timeout;
+ *   }
+ * }
+ * ```
 */
 export abstract class EmbeddingFunction<
  // biome-ignore lint/suspicious/noExplicitAny: we don't know what the implementor will do
@@ -44,33 +61,74 @@ export abstract class EmbeddingFunction<
   */
  // biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
  readonly TOptions!: M;
-  /**
-   * Convert the embedding function to a JSON object
-   * It is used to serialize the embedding function to the schema
-   * It's important that any object returned by this method contains all the necessary
-   * information to recreate the embedding function
-   *
-   * It should return the same object that was passed to the constructor
-   * If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
-   *
-   * @example
-   * ```ts
-   * class MyEmbeddingFunction extends EmbeddingFunction {
-   *   constructor(options: {model: string, timeout: number}) {
-   *     super();
-   *     this.model = options.model;
-   *     this.timeout = options.timeout;
-   *   }
-   *   toJSON() {
-   *     return {
-   *       model: this.model,
-   *       timeout: this.timeout,
-   *     };
-   * }
-   * ```
-   */
-  abstract toJSON(): Partial<M>;

+  #config: Partial<M>;
+
+  /**
+   * Get the original arguments to the constructor, to serialize them so they
+   * can be used to recreate the embedding function later.
+   */
+  // biome-ignore lint/suspicious/noExplicitAny :
+  toJSON(): Record<string, any> {
+    return JSON.parse(JSON.stringify(this.#config));
+  }
+
+  constructor() {
+    this.#config = {};
+  }
+
+  /**
+   * Provide a list of keys in the function options that should be treated as
+   * sensitive. If users pass raw values for these keys, they will be rejected.
+   */
+  protected getSensitiveKeys(): string[] {
+    return [];
+  }
+
+  /**
+   * Apply variables to the config.
+   */
+  protected resolveVariables(config: Partial<M>): Partial<M> {
+    this.#config = config;
+    const registry = getRegistry();
+    const newConfig = { ...config };
+    for (const [key_, value] of Object.entries(newConfig)) {
+      if (
+        this.getSensitiveKeys().includes(key_) &&
+        !value.startsWith("$var:")
+      ) {
+        throw new Error(
+          `The key "${key_}" is sensitive and cannot be set directly. Please use the $var: syntax to set it.`,
+        );
+      }
+      // Makes TS happy (https://stackoverflow.com/a/78391854)
+      const key = key_ as keyof M;
+      if (typeof value === "string" && value.startsWith("$var:")) {
+        const [name, defaultValue] = value.slice(5).split(":", 2);
+        const variableValue = registry.getVar(name);
+        if (!variableValue) {
+          if (defaultValue) {
+            // biome-ignore lint/suspicious/noExplicitAny:
+            newConfig[key] = defaultValue as any;
+          } else {
+            throw new Error(`Variable "${name}" not found`);
+          }
+        } else {
+          // biome-ignore lint/suspicious/noExplicitAny:
+          newConfig[key] = variableValue as any;
+        }
+      }
+    }
+    return newConfig;
+  }
+
+  /**
+   * Optionally load any resources needed for the embedding function.
+   *
+   * This method is called after the embedding function has been initialized
+   * but before any embeddings are computed. It is useful for loading local models
+   * or other resources that are needed for the embedding function to work.
+   */
  async init?(): Promise<void>;

  /**
--- a/nodejs/lancedb/embedding/openai.ts
+++ b/nodejs/lancedb/embedding/openai.ts
@@ -21,11 +21,13 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
  #modelName: OpenAIOptions["model"];

  constructor(
-    options: Partial<OpenAIOptions> = {
+    optionsRaw: Partial<OpenAIOptions> = {
      model: "text-embedding-ada-002",
    },
  ) {
    super();
+    const options = this.resolveVariables(optionsRaw);
+
    const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
    if (!openAIKey) {
      throw new Error("OpenAI API key is required");
@@ -52,10 +54,8 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
    this.#modelName = modelName;
  }

-  toJSON() {
-    return {
-      model: this.#modelName,
-    };
+  protected getSensitiveKeys(): string[] {
+    return ["apiKey"];
  }

  ndims(): number {
--- a/nodejs/lancedb/embedding/registry.ts
+++ b/nodejs/lancedb/embedding/registry.ts
@@ -23,6 +23,7 @@ export interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
 */
 export class EmbeddingFunctionRegistry {
  #functions = new Map<string, EmbeddingFunctionConstructor>();
+  #variables = new Map<string, string>();

  /**
   * Get the number of registered functions
@@ -82,10 +83,7 @@ export class EmbeddingFunctionRegistry {
      };
    } else {
      // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-      create = function (options?: any) {
-        const instance = new factory(options);
-        return instance;
-      };
+      create = (options?: any) => new factory(options);
    }

    return {
@@ -164,6 +162,37 @@ export class EmbeddingFunctionRegistry {

    return metadata;
  }
+
+  /**
+   * Set a variable. These can be accessed in the embedding function
+   * configuration using the syntax `$var:variable_name`. If they are not
+   * set, an error will be thrown letting you know which key is unset. If you
+   * want to supply a default value, you can add an additional part in the
+   * configuration like so: `$var:variable_name:default_value`. Default values
+   * can be used for runtime configurations that are not sensitive, such as
+   * whether to use a GPU for inference.
+   *
+   * The name must not contain colons. The default value can contain colons.
+   *
+   * @param name
+   * @param value
+   */
+  setVar(name: string, value: string): void {
+    if (name.includes(":")) {
+      throw new Error("Variable names cannot contain colons");
+    }
+    this.#variables.set(name, value);
+  }
+
+  /**
+   * Get a variable.
+   * @param name
+   * @returns
+   * @see {@link setVar}
+   */
+  getVar(name: string): string | undefined {
+    return this.#variables.get(name);
+  }
 }

 const _REGISTRY = new EmbeddingFunctionRegistry();
--- a/nodejs/lancedb/embedding/transformers.ts
+++ b/nodejs/lancedb/embedding/transformers.ts
@@ -44,11 +44,12 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
  #ndims?: number;

  constructor(
-    options: Partial<XenovaTransformerOptions> = {
+    optionsRaw: Partial<XenovaTransformerOptions> = {
      model: "Xenova/all-MiniLM-L6-v2",
    },
  ) {
    super();
+    const options = this.resolveVariables(optionsRaw);

    const modelName = options?.model ?? "Xenova/all-MiniLM-L6-v2";
    this.#tokenizerOptions = {
@@ -59,22 +60,6 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
    this.#ndims = options.ndims;
    this.#modelName = modelName;
  }
-  toJSON() {
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-    const obj: Record<string, any> = {
-      model: this.#modelName,
-    };
-    if (this.#ndims) {
-      obj["ndims"] = this.#ndims;
-    }
-    if (this.#tokenizerOptions) {
-      obj["tokenizerOptions"] = this.#tokenizerOptions;
-    }
-    if (this.#tokenizer) {
-      obj["tokenizer"] = this.#tokenizer.name;
-    }
-    return obj;
-  }

  async init() {
    let transformers;
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -14,7 +14,6 @@ import {

 export {
  AddColumnsSql,
-  ColumnAlteration,
  ConnectionOptions,
  IndexStatistics,
  IndexConfig,
@@ -54,6 +53,7 @@ export {
  Index,
  IndexOptions,
  IvfPqOptions,
+  IvfFlatOptions,
  HnswPqOptions,
  HnswSqOptions,
  FtsOptions,
@@ -65,6 +65,7 @@ export {
  UpdateOptions,
  OptimizeOptions,
  Version,
+  ColumnAlteration,
 } from "./table";

 export { MergeInsertBuilder } from "./merge";
@@ -79,7 +80,7 @@ export {
  DataLike,
  IntoVector,
 } from "./arrow";
-export { IntoSql } from "./util";
+export { IntoSql, packBits } from "./util";

 /**
 * Connect to a LanceDB instance at the given URI.
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -62,13 +62,13 @@ export interface IvfPqOptions {
   *
   * "l2" - Euclidean distance. This is a very common distance metric that
   * accounts for both magnitude and direction when determining the distance
-   * between vectors. L2 distance has a range of [0, ∞).
+   * between vectors. l2 distance has a range of [0, ∞).
   *
   * "cosine" - Cosine distance.  Cosine distance is a distance metric
   * calculated from the cosine similarity between two vectors. Cosine
   * similarity is a measure of similarity between two non-zero vectors of an
   * inner product space. It is defined to equal the cosine of the angle
-   * between them.  Unlike L2, the cosine distance is not affected by the
+   * between them.  Unlike l2, the cosine distance is not affected by the
   * magnitude of the vectors.  Cosine distance has a range of [0, 2].
   *
   * Note: the cosine distance is undefined when one (or both) of the vectors
@@ -77,7 +77,7 @@ export interface IvfPqOptions {
   *
   * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
   * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-   * L2 norm is 1), then dot distance is equivalent to the cosine distance.
+   * l2 norm is 1), then dot distance is equivalent to the cosine distance.
   */
  distanceType?: "l2" | "cosine" | "dot";

@@ -125,18 +125,18 @@ export interface HnswPqOptions {
   *
   * "l2" - Euclidean distance. This is a very common distance metric that
   * accounts for both magnitude and direction when determining the distance
-   * between vectors. L2 distance has a range of [0, ∞).
+   * between vectors. l2 distance has a range of [0, ∞).
   *
   * "cosine" - Cosine distance.  Cosine distance is a distance metric
   * calculated from the cosine similarity between two vectors. Cosine
   * similarity is a measure of similarity between two non-zero vectors of an
   * inner product space. It is defined to equal the cosine of the angle
-   * between them.  Unlike L2, the cosine distance is not affected by the
+   * between them.  Unlike l2, the cosine distance is not affected by the
   * magnitude of the vectors.  Cosine distance has a range of [0, 2].
   *
   * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
   * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-   * L2 norm is 1), then dot distance is equivalent to the cosine distance.
+   * l2 norm is 1), then dot distance is equivalent to the cosine distance.
   */
  distanceType?: "l2" | "cosine" | "dot";

@@ -241,18 +241,18 @@ export interface HnswSqOptions {
   *
   * "l2" - Euclidean distance. This is a very common distance metric that
   * accounts for both magnitude and direction when determining the distance
-   * between vectors. L2 distance has a range of [0, ∞).
+   * between vectors. l2 distance has a range of [0, ∞).
   *
   * "cosine" - Cosine distance.  Cosine distance is a distance metric
   * calculated from the cosine similarity between two vectors. Cosine
   * similarity is a measure of similarity between two non-zero vectors of an
   * inner product space. It is defined to equal the cosine of the angle
-   * between them.  Unlike L2, the cosine distance is not affected by the
+   * between them.  Unlike l2, the cosine distance is not affected by the
   * magnitude of the vectors.  Cosine distance has a range of [0, 2].
   *
   * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
   * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
-   * L2 norm is 1), then dot distance is equivalent to the cosine distance.
+   * l2 norm is 1), then dot distance is equivalent to the cosine distance.
   */
  distanceType?: "l2" | "cosine" | "dot";

@@ -327,6 +327,94 @@ export interface HnswSqOptions {
  efConstruction?: number;
 }

+/**
+ * Options to create an `IVF_FLAT` index
+ */
+export interface IvfFlatOptions {
+  /**
+   * The number of IVF partitions to create.
+   *
+   * This value should generally scale with the number of rows in the dataset.
+   * By default the number of partitions is the square root of the number of
+   * rows.
+   *
+   * If this value is too large then the first part of the search (picking the
+   * right partition) will be slow.  If this value is too small then the second
+   * part of the search (searching within a partition) will be slow.
+   */
+  numPartitions?: number;
+
+  /**
+   * Distance type to use to build the index.
+   *
+   * Default value is "l2".
+   *
+   * This is used when training the index to calculate the IVF partitions
+   * (vectors are grouped in partitions with similar vectors according to this
+   * distance type).
+   *
+   * The distance type used to train an index MUST match the distance type used
+   * to search the index.  Failure to do so will yield inaccurate results.
+   *
+   * The following distance types are available:
+   *
+   * "l2" - Euclidean distance. This is a very common distance metric that
+   * accounts for both magnitude and direction when determining the distance
+   * between vectors. l2 distance has a range of [0, ∞).
+   *
+   * "cosine" - Cosine distance.  Cosine distance is a distance metric
+   * calculated from the cosine similarity between two vectors. Cosine
+   * similarity is a measure of similarity between two non-zero vectors of an
+   * inner product space. It is defined to equal the cosine of the angle
+   * between them.  Unlike l2, the cosine distance is not affected by the
+   * magnitude of the vectors.  Cosine distance has a range of [0, 2].
+   *
+   * Note: the cosine distance is undefined when one (or both) of the vectors
+   * are all zeros (there is no direction).  These vectors are invalid and may
+   * never be returned from a vector search.
+   *
+   * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+   * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+   * l2 norm is 1), then dot distance is equivalent to the cosine distance.
+   *
+   * "hamming" - Hamming distance. Hamming distance is a distance metric
+   * calculated from the number of bits that are different between two vectors.
+   * Hamming distance has a range of [0, dimension]. Note that the hamming distance
+   * is only valid for binary vectors.
+   */
+  distanceType?: "l2" | "cosine" | "dot" | "hamming";
+
+  /**
+   * Max iteration to train IVF kmeans.
+   *
+   * When training an IVF FLAT index we use kmeans to calculate the partitions.  This parameter
+   * controls how many iterations of kmeans to run.
+   *
+   * Increasing this might improve the quality of the index but in most cases these extra
+   * iterations have diminishing returns.
+   *
+   * The default value is 50.
+   */
+  maxIterations?: number;
+
+  /**
+   * The number of vectors, per partition, to sample when training IVF kmeans.
+   *
+   * When an IVF FLAT index is trained, we need to calculate partitions.  These are groups
+   * of vectors that are similar to each other.  To do this we use an algorithm called kmeans.
+   *
+   * Running kmeans on a large dataset can be slow.  To speed this up we run kmeans on a
+   * random sample of the data.  This parameter controls the size of the sample.  The total
+   * number of vectors used to train the index is `sample_rate * num_partitions`.
+   *
+   * Increasing this value might improve the quality of the index but in most cases the
+   * default should be sufficient.
+   *
+   * The default value is 256.
+   */
+  sampleRate?: number;
+}
+
 /**
 * Options to create a full text search index
 */
@@ -426,6 +514,33 @@ export class Index {
    );
  }

+  /**
+   * Create an IvfFlat index
+   *
+   * This index groups vectors into partitions of similar vectors.  Each partition keeps track of
+   * a centroid which is the average value of all vectors in the group.
+   *
+   * During a query the centroids are compared with the query vector to find the closest
+   * partitions.  The vectors in these partitions are then searched to find
+   * the closest vectors.
+   *
+   * The partitioning process is called IVF and the `num_partitions` parameter controls how
+   * many groups to create.
+   *
+   * Note that training an IVF FLAT index on a large dataset is a slow operation and
+   * currently is also a memory intensive operation.
+   */
+  static ivfFlat(options?: Partial<IvfFlatOptions>) {
+    return new Index(
+      LanceDbIndex.ivfFlat(
+        options?.distanceType,
+        options?.numPartitions,
+        options?.maxIterations,
+        options?.sampleRate,
+      ),
+    );
+  }
+
  /**
   * Create a btree index
   *
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -4,8 +4,10 @@
 import {
  Table as ArrowTable,
  Data,
+  DataType,
  IntoVector,
  Schema,
+  dataTypeToJson,
  fromDataToBuffer,
  tableFromIPC,
 } from "./arrow";
@@ -15,13 +17,13 @@ import { IndexOptions } from "./indices";
 import { MergeInsertBuilder } from "./merge";
 import {
  AddColumnsSql,
-  ColumnAlteration,
  IndexConfig,
  IndexStatistics,
  OptimizeStats,
  Table as _NativeTable,
 } from "./native";
 import { Query, VectorQuery } from "./query";
+import { sanitizeType } from "./sanitize";
 import { IntoSql, toSQL } from "./util";
 export { IndexConfig } from "./native";

@@ -618,7 +620,27 @@ export class LocalTable extends Table {
  }

  async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
-    await this.inner.alterColumns(columnAlterations);
+    const processedAlterations = columnAlterations.map((alteration) => {
+      if (typeof alteration.dataType === "string") {
+        return {
+          ...alteration,
+          dataType: JSON.stringify({ type: alteration.dataType }),
+        };
+      } else if (alteration.dataType === undefined) {
+        return {
+          ...alteration,
+          dataType: undefined,
+        };
+      } else {
+        const dataType = sanitizeType(alteration.dataType);
+        return {
+          ...alteration,
+          dataType: JSON.stringify(dataTypeToJson(dataType)),
+        };
+      }
+    });
+
+    await this.inner.alterColumns(processedAlterations);
  }

  async dropColumns(columnNames: string[]): Promise<void> {
@@ -711,3 +733,38 @@ export class LocalTable extends Table {
    await this.inner.migrateManifestPathsV2();
  }
 }
+
+/**
+ *  A definition of a column alteration. The alteration changes the column at
+ * `path` to have the new name `name`, to be nullable if `nullable` is true,
+ * and to have the data type `data_type`. At least one of `rename` or `nullable`
+ * must be provided.
+ */
+export interface ColumnAlteration {
+  /**
+   * The path to the column to alter. This is a dot-separated path to the column.
+   * If it is a top-level column then it is just the name of the column. If it is
+   * a nested column then it is the path to the column, e.g. "a.b.c" for a column
+   * `c` nested inside a column `b` nested inside a column `a`.
+   */
+  path: string;
+  /**
+   * The new name of the column. If not provided then the name will not be changed.
+   * This must be distinct from the names of all other columns in the table.
+   */
+  rename?: string;
+  /**
+   * A new data type for the column. If not provided then the data type will not be changed.
+   * Changing data types is limited to casting to the same general type. For example, these
+   * changes are valid:
+   * * `int32` -> `int64` (integers)
+   * * `double` -> `float` (floats)
+   * * `string` -> `large_string` (strings)
+   * But these changes are not:
+   * * `int32` -> `double` (mix integers and floats)
+   * * `string` -> `int32` (mix strings and integers)
+   */
+  dataType?: string | DataType;
+  /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
+  nullable?: boolean;
+}
--- a/nodejs/lancedb/util.ts
+++ b/nodejs/lancedb/util.ts
@@ -35,6 +35,16 @@ export function toSQL(value: IntoSql): string {
  }
 }

+export function packBits(data: Array<number>): Array<number> {
+  const packed = Array(data.length >> 3).fill(0);
+  for (let i = 0; i < data.length; i++) {
+    const byte = i >> 3;
+    const bit = i & 7;
+    packed[byte] |= data[i] << bit;
+  }
+  return packed;
+}
+
 export class TTLCache {
  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
  private readonly cache: Map<string, { value: any; expires: number }>;
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.15.1-beta.3",
+  "version": "0.18.2-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.15.1-beta.3",
+	"version": "0.18.2-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.15.1-beta.3",
+  "version": "0.18.2-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.15.1-beta.3",
+      "version": "0.18.2-beta.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.15.1-beta.3",
+  "version": "0.18.2-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
@@ -29,7 +29,6 @@
        "aarch64-apple-darwin",
        "x86_64-unknown-linux-gnu",
        "aarch64-unknown-linux-gnu",
-        "x86_64-unknown-linux-musl",
        "aarch64-unknown-linux-musl",
        "x86_64-pc-windows-msvc",
        "aarch64-pc-windows-msvc"
@@ -74,8 +73,10 @@
    "artifacts": "napi artifacts",
    "build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
    "build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
-    "build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts && shx cp lancedb/*.node dist/",
-    "build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
+    "build": "npm run build:debug && npm run tsc && shx cp lancedb/*.node dist/",
+    "build-release": "npm run build:release && npm run tsc",
+    "tsc": "tsc -b",
+    "posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",
    "lint-ci": "biome ci .",
    "docs": "typedoc --plugin typedoc-plugin-markdown --treatWarningsAsErrors --out ../docs/src/js lancedb/index.ts",
    "postdocs": "node typedoc_post_process.js",
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -4,7 +4,9 @@
 use std::sync::Mutex;

 use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
-use lancedb::index::vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder};
+use lancedb::index::vector::{
+    IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
+};
 use lancedb::index::Index as LanceDbIndex;
 use napi_derive::napi;

@@ -63,6 +65,32 @@ impl Index {
        })
    }

+    #[napi(factory)]
+    pub fn ivf_flat(
+        distance_type: Option<String>,
+        num_partitions: Option<u32>,
+        max_iterations: Option<u32>,
+        sample_rate: Option<u32>,
+    ) -> napi::Result<Self> {
+        let mut ivf_flat_builder = IvfFlatIndexBuilder::default();
+        if let Some(distance_type) = distance_type {
+            let distance_type = parse_distance_type(distance_type)?;
+            ivf_flat_builder = ivf_flat_builder.distance_type(distance_type);
+        }
+        if let Some(num_partitions) = num_partitions {
+            ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
+        }
+        if let Some(max_iterations) = max_iterations {
+            ivf_flat_builder = ivf_flat_builder.max_iterations(max_iterations);
+        }
+        if let Some(sample_rate) = sample_rate {
+            ivf_flat_builder = ivf_flat_builder.sample_rate(sample_rate);
+        }
+        Ok(Self {
+            inner: Mutex::new(Some(LanceDbIndex::IvfFlat(ivf_flat_builder))),
+        })
+    }
+
    #[napi(factory)]
    pub fn btree() -> Self {
        Self {
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -498,6 +498,9 @@ pub struct IndexStatistics {
    pub distance_type: Option<String>,
    /// The number of parts this index is split into.
    pub num_indices: Option<u32>,
+    /// The KMeans loss value of the index,
+    /// it is only present for vector indices.
+    pub loss: Option<f64>,
 }
 impl From<lancedb::index::IndexStatistics> for IndexStatistics {
    fn from(value: lancedb::index::IndexStatistics) -> Self {
@@ -507,6 +510,7 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
            index_type: value.index_type.to_string(),
            distance_type: value.distance_type.map(|d| d.to_string()),
            num_indices: value.num_indices,
+            loss: value.loss,
        }
    }
 }
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`!function(){var e,t,n;e="9627b71b382d201",t=function(){Reo.init({clientID:"9627b71b382d201"})},(n=document.createElement("script")).src="https://static.reo.dev/"+e+"/reo.js",n.defer=!0,n.onload=t,document.head.appendChild(n)}();`