Update mkdocs.yml

fix: always uses slashes in table uris (#2575 )
Closes #2574
2025-12-24 22:09:58 +00:00 · 2025-08-06 17:17:45 -07:00 · 2025-08-05 12:12:57 -07:00 · 2025-08-04 15:36:49 -07:00 · 2025-08-04 14:14:33 -07:00 · 2025-08-04 11:42:39 -07:00
131 changed files with 1599 additions and 14343 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.2-beta.0"
+current_version = "0.21.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -50,11 +50,6 @@ pre_commit_hooks = [
 optional_value = "final"
 values = ["beta", "final"]

-[[tool.bumpversion.files]]
-filename = "node/package.json"
-replace = "\"version\": \"{new_version}\","
-search = "\"version\": \"{current_version}\","
-
 [[tool.bumpversion.files]]
 filename = "nodejs/package.json"
 replace = "\"version\": \"{new_version}\","
@@ -66,39 +61,8 @@ glob = "nodejs/npm/*/package.json"
 replace = "\"version\": \"{new_version}\","
 search = "\"version\": \"{current_version}\","

-# vectodb node binary packages
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
-search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
-search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
-search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
-
 # Cargo files
 # ------------
-[[tool.bumpversion.files]]
-filename = "rust/ffi/node/Cargo.toml"
-replace = "\nversion = \"{new_version}\""
-search = "\nversion = \"{current_version}\""
-
 [[tool.bumpversion.files]]
 filename = "rust/lancedb/Cargo.toml"
 replace = "\nversion = \"{new_version}\""
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -5,8 +5,8 @@ on:
    tags-ignore:
      # We don't publish pre-releases for Rust. Crates.io is just a source
      # distribution, so we don't need to publish pre-releases.
-      - 'v*-beta*'
-      - '*-v*' # for example, python-vX.Y.Z
+      - "v*-beta*"
+      - "*-v*" # for example, python-vX.Y.Z

 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,6 +19,8 @@ env:
 jobs:
  build:
    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write
    timeout-minutes: 30
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -31,6 +33,8 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
+      - uses: rust-lang/crates-io-auth-action@v1
+        id: auth
      - name: Publish the package
        run: |
-          cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -1,147 +0,0 @@
-name: Node
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    paths:
-      - node/**
-      - rust/ffi/node/**
-      - .github/workflows/node.yml
-      - docker-compose.yml
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-env:
-  # Disable full debug symbol generation to speed up CI build and keep memory down
-  # "1" means line tables only, which is useful for panic tracebacks.
-  #
-  # Use native CPU to accelerate tests if possible, especially for f16
-  # target-cpu=haswell fixes failing ci build
-  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
-  RUST_BACKTRACE: "1"
-
-jobs:
-  linux:
-    name: Linux (Node ${{ matrix.node-version }})
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        node-version: [ "18", "20" ]
-    runs-on: "ubuntu-22.04"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: ${{ matrix.node-version }}
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: npm run test
-  macos:
-    timeout-minutes: 30
-    runs-on: "macos-13"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: brew install protobuf
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: |
-        npm run test
-  aws-integtest:
-    timeout-minutes: 45
-    runs-on: "ubuntu-22.04"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    env:
-      AWS_ACCESS_KEY_ID: ACCESSKEY
-      AWS_SECRET_ACCESS_KEY: SECRETKEY
-      AWS_DEFAULT_REGION: us-west-2
-      # this one is for s3
-      AWS_ENDPOINT: http://localhost:4566
-      # this one is for dynamodb
-      DYNAMODB_ENDPOINT: http://localhost:4566
-      ALLOW_HTTP: true
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - name: start local stack
-      run: docker compose -f ../docker-compose.yml up -d --wait
-    - name: create s3
-      run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
-    - name: create ddb
-      run: |
-        aws dynamodb create-table \
-          --table-name lancedb-integtest \
-          --attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
-          --key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
-          --provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
-          --endpoint-url $DYNAMODB_ENDPOINT
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: npm run integration-test
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,200 +365,3 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
-
-
-  # ----------------------------------------------------------------------------
-  # vectordb release (legacy)
-  # ----------------------------------------------------------------------------
-  # TODO: delete this when we drop vectordb
-  node:
-    name: vectordb Typescript
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          cache: "npm"
-          cache-dependency-path: node/package-lock.json
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Build
-        run: |
-          npm ci
-          npm run tsc
-          npm pack
-      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-package
-          path: |
-            node/vectordb-*.tgz
-
-  node-macos:
-    name: vectordb ${{ matrix.config.arch }}
-    strategy:
-      matrix:
-        config:
-          - arch: x86_64-apple-darwin
-            runner: macos-13
-          - arch: aarch64-apple-darwin
-            # xlarge is implicitly arm64.
-            runner: macos-14
-    runs-on: ${{ matrix.config.runner }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install system dependencies
-        run: brew install protobuf
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build MacOS native node modules
-        run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
-      - name: Upload Darwin Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-darwin-${{ matrix.config.arch }}
-          path: |
-            node/dist/lancedb-vectordb-darwin*.tgz
-
-  node-linux-gnu:
-    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
-    runs-on: ${{ matrix.config.runner }}
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - arch: x86_64
-            runner: ubuntu-latest
-          - arch: aarch64
-            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
-            runner: warp-ubuntu-latest-arm64-4x
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      # To avoid OOM errors on ARM, we create a swap file.
-      - name: Configure aarch64 build
-        if: ${{ matrix.config.arch == 'aarch64' }}
-        run: |
-          free -h
-          sudo fallocate -l 16G /swapfile
-          sudo chmod 600 /swapfile
-          sudo mkswap /swapfile
-          sudo swapon /swapfile
-          echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
-          # print info
-          swapon --show
-          free -h
-      - name: Build Linux Artifacts
-        run: |
-          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
-      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-linux-${{ matrix.config.arch }}-gnu
-          path: |
-            node/dist/lancedb-vectordb-linux*.tgz
-
-  node-windows:
-    name: vectordb ${{ matrix.target }}
-    runs-on: windows-2022
-    strategy:
-      fail-fast: false
-      matrix:
-        target: [x86_64-pc-windows-msvc]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install Protoc v21.12
-        working-directory: C:\
-        run: |
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          7z x protoc.zip
-          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
-      - name: Upload Windows Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-windows
-          path: |
-            node/dist/lancedb-vectordb-win32*.tgz
-
-  release:
-    name: vectordb NPM Publish
-    needs: [node, node-macos, node-linux-gnu, node-windows]
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-    # Only runs on tags that matches the make-release action
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: actions/download-artifact@v4
-        with:
-          pattern: node-*
-      - name: Display structure of downloaded files
-        run: ls -R
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          registry-url: "https://registry.npmjs.org"
-      - name: Publish to NPM
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        run: |
-          # Tag beta as "preview" instead of default "latest". See lancedb
-          # npm publish step for more info.
-          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
-            PUBLISH_ARGS="--tag preview"
-          fi
-
-          mv */*.tgz .
-          for filename in *.tgz; do
-            npm publish $PUBLISH_ARGS $filename
-          done
-      - name: Deprecate
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        # We need to deprecate the old package to avoid confusion.
-        # Each time we publish a new version, it gets undeprecated.
-        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: main
-      - name: Update package-lock.json
-        run: |
-          git config user.name 'Lance Release'
-          git config user.email 'lance-dev@lancedb.com'
-          bash ci/update_lockfiles.sh
-      - name: Push new commit
-        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-          branch: main
-      - name: Notify Slack Action
-        uses: ravsamhq/notify-slack-action@2.3.0
-        if: ${{ always() }}
-        with:
-          status: ${{ job.status }}
-          notify_when: "failure"
-          notification_title: "{workflow} is failing"
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,24 @@
+LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
+It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
+remote (against LanceDB Cloud).
+
+The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
+
+Project layout:
+
+* `rust/lancedb`: The LanceDB core Rust implementation.
+* `python`: The Python bindings, using PyO3.
+* `nodejs`: The Typescript bindings, using napi-rs
+* `java`: The Java bindings
+
+(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
+
+Common commands:
+
+* Check for compiler errors: `cargo check --features remote --tests --examples`
+* Run tests: `cargo test --features remote --tests`
+* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
+* Lint: `cargo clippy --features remote --tests --examples`
+* Format: `cargo fmt --all`
+
+Before committing changes, run formatting.
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1039,6 +1039,17 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "backon"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "302eaff5357a264a2c42f127ecb8bac761cf99749fc3dc95677e2743991f99e7"
+dependencies = [
+ "fastrand",
+ "gloo-timers",
+ "tokio",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.75"
@@ -1469,7 +1480,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
 dependencies = [
 "glob",
 "libc",
- "libloading 0.8.8",
+ "libloading",
 ]

 [[package]]
@@ -1562,15 +1573,6 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"

-[[package]]
-name = "conv"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299"
-dependencies = [
- "custom_derive",
-]
-
 [[package]]
 name = "convert_case"
 version = "0.6.0"
@@ -1786,12 +1788,6 @@ dependencies = [
 "syn 2.0.103",
 ]

-[[package]]
-name = "custom_derive"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9"
-
 [[package]]
 name = "darling"
 version = "0.20.11"
@@ -2477,6 +2473,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
 "block-buffer",
+ "const-oid",
 "crypto-common",
 "subtle",
 ]
@@ -2840,9 +2837,10 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
+ "arrow-array",
 "rand 0.8.5",
 ]

@@ -3256,6 +3254,18 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"

+[[package]]
+name = "gloo-timers"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "group"
 version = "0.12.1"
@@ -3792,6 +3802,17 @@ dependencies = [
 "cfg-if",
 ]

+[[package]]
+name = "io-uring"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
+dependencies = [
+ "bitflags 2.9.1",
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -3930,8 +3951,8 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -3993,8 +4014,8 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4011,8 +4032,8 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4047,8 +4068,8 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4076,8 +4097,8 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4093,8 +4114,8 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrayref",
 "arrow",
@@ -4133,8 +4154,8 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4168,8 +4189,8 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4203,7 +4224,6 @@ dependencies = [
 "lance-linalg",
 "lance-table",
 "log",
- "moka",
 "num-traits",
 "object_store",
 "prost",
@@ -4223,8 +4243,8 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4248,6 +4268,8 @@ dependencies = [
 "lance-core",
 "log",
 "object_store",
+ "object_store_opendal",
+ "opendal",
 "path_abs",
 "pin-project",
 "prost",
@@ -4262,8 +4284,8 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4286,8 +4308,8 @@ dependencies = [

 [[package]]
 name = "lance-table"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4325,8 +4347,8 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -4337,7 +4359,7 @@ dependencies = [

 [[package]]
 name = "lancedb"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4393,7 +4415,7 @@ dependencies = [
 "regex",
 "reqwest",
 "rstest",
- "semver 1.0.26",
+ "semver",
 "serde",
 "serde_json",
 "serde_with",
@@ -4422,34 +4444,9 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "lancedb-node"
-version = "0.21.2-beta.0"
-dependencies = [
- "arrow-array",
- "arrow-ipc",
- "arrow-schema",
- "async-trait",
- "chrono",
- "conv",
- "env_logger",
- "futures",
- "half",
- "lance",
- "lance-index",
- "lance-linalg",
- "lancedb",
- "lzma-sys",
- "neon",
- "object_store",
- "once_cell",
- "snafu",
- "tokio",
-]
-
 [[package]]
 name = "lancedb-nodejs"
-version = "0.21.2-beta.0"
+version = "0.21.2"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4469,7 +4466,7 @@ dependencies = [

 [[package]]
 name = "lancedb-python"
-version = "0.24.2-beta.0"
+version = "0.24.2"
 dependencies = [
 "arrow",
 "env_logger",
@@ -4570,16 +4567,6 @@ version = "0.2.174"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"

-[[package]]
-name = "libloading"
-version = "0.6.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
-dependencies = [
- "cfg-if",
- "winapi",
-]
-
 [[package]]
 name = "libloading"
 version = "0.8.8"
@@ -4958,7 +4945,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "regex",
- "semver 1.0.26",
+ "semver",
 "syn 2.0.103",
 ]

@@ -4968,48 +4955,7 @@ version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "427802e8ec3a734331fec1035594a210ce1ff4dc5bc1950530920ab717964ea3"
 dependencies = [
- "libloading 0.8.8",
-]
-
-[[package]]
-name = "neon"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28e15415261d880aed48122e917a45e87bb82cf0260bb6db48bbab44b7464373"
-dependencies = [
- "neon-build",
- "neon-macros",
- "neon-runtime",
- "semver 0.9.0",
- "smallvec",
-]
-
-[[package]]
-name = "neon-build"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bac98a702e71804af3dacfde41edde4a16076a7bbe889ae61e56e18c5b1c811"
-
-[[package]]
-name = "neon-macros"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7288eac8b54af7913c60e0eb0e2a7683020dffa342ab3fd15e28f035ba897cf"
-dependencies = [
- "quote",
- "syn 1.0.109",
- "syn-mid",
-]
-
-[[package]]
-name = "neon-runtime"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4676720fa8bb32c64c3d9f49c47a47289239ec46b4bdb66d0913cc512cb0daca"
-dependencies = [
- "cfg-if",
- "libloading 0.6.7",
- "smallvec",
+ "libloading",
 ]

 [[package]]
@@ -5215,6 +5161,21 @@ dependencies = [
 "web-time",
 ]

+[[package]]
+name = "object_store_opendal"
+version = "0.54.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ce697ee723fdc3eaf6c457abf4059034be15167022b18b619993802cd1443d5"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures",
+ "object_store",
+ "opendal",
+ "pin-project",
+ "tokio",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -5255,6 +5216,33 @@ dependencies = [
 "pkg-config",
 ]

+[[package]]
+name = "opendal"
+version = "0.54.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffb9838d0575c6dbaf3fcec7255af8d5771996d4af900bbb6fa9a314dec00a1a"
+dependencies = [
+ "anyhow",
+ "backon",
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "futures",
+ "getrandom 0.2.16",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "log",
+ "md-5",
+ "percent-encoding",
+ "quick-xml",
+ "reqsign",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "tokio",
+ "uuid",
+]
+
 [[package]]
 name = "openssl-probe"
 version = "0.1.6"
@@ -6460,6 +6448,33 @@ version = "1.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"

+[[package]]
+name = "reqsign"
+version = "0.16.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "base64 0.22.1",
+ "chrono",
+ "form_urlencoded",
+ "getrandom 0.2.16",
+ "hex",
+ "hmac",
+ "home",
+ "http 1.3.1",
+ "log",
+ "once_cell",
+ "percent-encoding",
+ "rand 0.8.5",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha1",
+ "sha2",
+]
+
 [[package]]
 name = "reqwest"
 version = "0.12.20"
@@ -6622,7 +6637,7 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
 dependencies = [
- "semver 1.0.26",
+ "semver",
 ]

 [[package]]
@@ -6887,27 +6902,12 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "semver"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
-dependencies = [
- "semver-parser",
-]
-
 [[package]]
 name = "semver"
 version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"

-[[package]]
-name = "semver-parser"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
-
 [[package]]
 name = "seq-macro"
 version = "0.3.6"
@@ -7307,17 +7307,6 @@ dependencies = [
 "unicode-ident",
 ]

-[[package]]
-name = "syn-mid"
-version = "0.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fea305d57546cc8cd04feb14b62ec84bf17f50e3f7b12560d7bfa9265f39d9ed"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
 [[package]]
 name = "sync_wrapper"
 version = "1.0.2"
@@ -7732,16 +7721,18 @@ dependencies = [

 [[package]]
 name = "tokio"
-version = "1.45.1"
+version = "1.46.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
+checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
 dependencies = [
 "backtrace",
 "bytes",
+ "io-uring",
 "libc",
 "mio",
 "pin-project-lite",
 "signal-hook-registry",
+ "slab",
 "socket2",
 "tokio-macros",
 "windows-sys 0.52.0",
@@ -7951,7 +7942,7 @@ checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3"
 dependencies = [
 "gemm 0.18.2",
 "half",
- "libloading 0.8.8",
+ "libloading",
 "memmap2 0.9.5",
 "num",
 "num-traits",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,5 @@
 [workspace]
 members = [
-    "rust/ffi/node",
    "rust/lancedb",
    "nodejs",
    "python",
@@ -21,16 +20,16 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.31.2", "features" = [
+lance = { "version" = "=0.32.1", "features" = [
    "dynamodb",
-], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
+], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -1,22 +0,0 @@
-#!/bin/bash
-set -e
-ARCH=${1:-x86_64}
-TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
-
-# We pass down the current user so that when we later mount the local files
-# into the container, the files are accessible by the current user.
-pushd ci/manylinux_node
-docker build \
-    -t lancedb-node-manylinux \
-    --build-arg="ARCH=$ARCH" \
-    --build-arg="DOCKER_USER=$(id -u)" \
-    --progress=plain \
-    .
-popd
-
-# We turn on memory swap to avoid OOM killer
-docker run \
-    -v $(pwd):/io -w /io \
-    --memory-swap=-1 \
-    lancedb-node-manylinux \
-    bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
--- a/ci/build_macos_artifacts.sh
+++ b/ci/build_macos_artifacts.sh
@@ -1,34 +0,0 @@
-# Builds the macOS artifacts (node binaries).
-# Usage: ./ci/build_macos_artifacts.sh [target]
-# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
-set -e
-
-prebuild_rust() {
-    # Building here for the sake of easier debugging.
-    pushd rust/ffi/node
-    echo "Building rust library for $1"
-    export RUST_BACKTRACE=1
-    cargo build --release --target $1
-    popd
-}
-
-build_node_binaries() {
-    pushd node
-    echo "Building node library for $1"
-    npm run build-release -- --target $1
-    npm run pack-build -- --target $1
-    popd
-}
-
-if [ -n "$1" ]; then
-    targets=$1
-else
-    targets="x86_64-apple-darwin aarch64-apple-darwin"
-fi
-
-echo "Building artifacts for targets: $targets"
-for target in $targets
-    do
-    prebuild_rust $target
-    build_node_binaries $target
-done
--- a/ci/build_windows_artifacts.ps1
+++ b/ci/build_windows_artifacts.ps1
@@ -1,42 +0,0 @@
-# Builds the Windows artifacts (node binaries).
-# Usage:  .\ci\build_windows_artifacts.ps1 [target]
-# Targets supported:
-# - x86_64-pc-windows-msvc
-# - i686-pc-windows-msvc
-# - aarch64-pc-windows-msvc
-
-function Prebuild-Rust {
-    param (
-        [string]$target
-    )
-
-    # Building here for the sake of easier debugging.
-    Push-Location -Path "rust/ffi/node"
-    Write-Host "Building rust library for $target"
-    $env:RUST_BACKTRACE=1
-    cargo build --release --target $target
-    Pop-Location
-}
-
-function Build-NodeBinaries {
-    param (
-        [string]$target
-    )
-
-    Push-Location -Path "node"
-    Write-Host "Building node library for $target"
-    npm run build-release -- --target $target
-    npm run pack-build -- --target $target
-    Pop-Location
-}
-
-$targets = $args[0]
-if (-not $targets) {
-    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
-}
-
-Write-Host "Building artifacts for targets: $targets"
-foreach ($target in $targets) {
-    Prebuild-Rust $target
-    Build-NodeBinaries $target
-}
--- a/ci/build_windows_artifacts_nodejs.ps1
+++ b/ci/build_windows_artifacts_nodejs.ps1
@@ -1,42 +0,0 @@
-# Builds the Windows artifacts (nodejs binaries).
-# Usage:  .\ci\build_windows_artifacts_nodejs.ps1 [target]
-# Targets supported:
-# - x86_64-pc-windows-msvc
-# - i686-pc-windows-msvc
-# - aarch64-pc-windows-msvc
-
-function Prebuild-Rust {
-    param (
-        [string]$target
-    )
-
-    # Building here for the sake of easier debugging.
-    Push-Location -Path "rust/lancedb"
-    Write-Host "Building rust library for $target"
-    $env:RUST_BACKTRACE=1
-    cargo build --release --target $target
-    Pop-Location
-}
-
-function Build-NodeBinaries {
-    param (
-        [string]$target
-    )
-
-    Push-Location -Path "nodejs"
-    Write-Host "Building nodejs library for $target"
-    $env:RUST_TARGET=$target
-    npm run build-release
-    Pop-Location
-}
-
-$targets = $args[0]
-if (-not $targets) {
-    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
-}
-
-Write-Host "Building artifacts for targets: $targets"
-foreach ($target in $targets) {
-    Prebuild-Rust $target
-    Build-NodeBinaries $target
-}
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -1,27 +0,0 @@
-# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
-# This container allows building the node modules native libraries in an
-# environment with a very old glibc, so that we are compatible with a wide
-# range of linux distributions.
-ARG ARCH=x86_64
-
-FROM quay.io/pypa/manylinux_2_28_${ARCH}
-
-ARG ARCH=x86_64
-ARG DOCKER_USER=default_user
-
-# Protobuf is also installed as root.
-COPY install_protobuf.sh install_protobuf.sh
-RUN ./install_protobuf.sh ${ARCH}
-
-ENV DOCKER_USER=${DOCKER_USER}
-# Create a group and user, but only if it doesn't exist
-RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
-
-# We switch to the user to install Rust and Node, since those like to be
-# installed at the user level.
-USER ${DOCKER_USER}
-
-COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
-RUN cp /prepare_manylinux_node.sh $HOME/ && \
-    cd $HOME && \
-    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -1,13 +0,0 @@
-#!/bin/bash
-# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
-set -e
-ARCH=${1:-x86_64}
-TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
-
-#Alpine doesn't have .bashrc
-FILE=$HOME/.bashrc && test -f $FILE && source $FILE
-
-cd node
-npm ci
-npm run build-release
-npm run pack-build -- -t $TARGET_TRIPLE
--- a/ci/manylinux_node/install_protobuf.sh
+++ b/ci/manylinux_node/install_protobuf.sh
@@ -1,15 +0,0 @@
-#!/bin/bash
-# Installs protobuf compiler. Should be run as root.
-set -e
-
-if [[ $1 == x86_64* ]]; then
-    ARCH=x86_64
-else
-    # gnu target
-    ARCH=aarch_64
-fi
-
-PB_REL=https://github.com/protocolbuffers/protobuf/releases
-PB_VERSION=23.1
-curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
-unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_node/prepare_manylinux_node.sh
+++ b/ci/manylinux_node/prepare_manylinux_node.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-set -e
-
-install_node() {
-    echo "Installing node..."
-
-    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
-
-    source "$HOME"/.bashrc
-
-    nvm install --no-progress 18
-}
-
-install_rust() {
-    echo "Installing rust..."
-    curl https://sh.rustup.rs -sSf | bash -s -- -y
-    export PATH="$PATH:/root/.cargo/bin"
-}
-
-install_node
-install_rust
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -103,264 +103,6 @@ markdown_extensions:
      permalink: ""

 nav:
-  - Home:
-      - LanceDB: index.md
-      - 🏃🏼‍♂️ Quick start: basic.md
-      - 📚 Concepts:
-          - Vector search: concepts/vector_search.md
-          - Indexing:
-              - IVFPQ: concepts/index_ivfpq.md
-              - HNSW: concepts/index_hnsw.md
-          - Storage: concepts/storage.md
-          - Data management: concepts/data_management.md
-      - 🔨 Guides:
-          - Working with tables: guides/tables.md
-          - Building a vector index: ann_indexes.md
-          - Vector Search: search.md
-          - Full-text search (native): fts.md
-          - Full-text search (tantivy-based): fts_tantivy.md
-          - Building a scalar index: guides/scalar_index.md
-          - Hybrid search:
-              - Overview: hybrid_search/hybrid_search.md
-              - Comparing Rerankers: hybrid_search/eval.md
-              - Airbnb financial data example: notebooks/hybrid_search.ipynb
-          - Late interaction with MultiVector search:
-              - Overview: guides/multi-vector.md
-              - Example: notebooks/Multivector_on_LanceDB.ipynb
-          - RAG:
-              - Vanilla RAG: rag/vanilla_rag.md
-              - Multi-head RAG: rag/multi_head_rag.md
-              - Corrective RAG: rag/corrective_rag.md
-              - Agentic RAG: rag/agentic_rag.md
-              - Graph RAG: rag/graph_rag.md
-              - Self RAG: rag/self_rag.md
-              - Adaptive RAG: rag/adaptive_rag.md
-              - SFR RAG: rag/sfr_rag.md
-              - Advanced Techniques:
-                  - HyDE: rag/advanced_techniques/hyde.md
-                  - FLARE: rag/advanced_techniques/flare.md
-          - Reranking:
-              - Quickstart: reranking/index.md
-              - Cohere Reranker: reranking/cohere.md
-              - Linear Combination Reranker: reranking/linear_combination.md
-              - Reciprocal Rank Fusion Reranker: reranking/rrf.md
-              - Cross Encoder Reranker: reranking/cross_encoder.md
-              - ColBERT Reranker: reranking/colbert.md
-              - Jina Reranker: reranking/jina.md
-              - OpenAI Reranker: reranking/openai.md
-              - AnswerDotAi Rerankers: reranking/answerdotai.md
-              - Voyage AI Rerankers: reranking/voyageai.md
-              - Building Custom Rerankers: reranking/custom_reranker.md
-              - Example: notebooks/lancedb_reranking.ipynb
-          - Filtering: sql.md
-          - Versioning & Reproducibility:
-              - sync API: notebooks/reproducibility.ipynb
-              - async API: notebooks/reproducibility_async.ipynb
-          - Configuring Storage: guides/storage.md
-          - Migration Guide: migration.md
-          - Tuning retrieval performance:
-              - Choosing right query type: guides/tuning_retrievers/1_query_types.md
-              - Reranking: guides/tuning_retrievers/2_reranking.md
-              - Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
-      - 🧬 Managing embeddings:
-          - Understand Embeddings: embeddings/understanding_embeddings.md
-          - Get Started: embeddings/index.md
-          - Embedding functions: embeddings/embedding_functions.md
-          - Available models:
-              - Overview: embeddings/default_embedding_functions.md
-              - Text Embedding Functions:
-                  - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
-                  - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
-                  - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
-                  - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
-                  - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
-                  - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
-                  - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
-                  - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
-                  - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
-                  - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
-                  - Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
-              - Multimodal Embedding Functions:
-                  - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
-                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
-                  - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
-          - User-defined embedding functions: embeddings/custom_embedding_function.md
-          - Variables and secrets: embeddings/variables_and_secrets.md
-          - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
-          - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
-      - 🔌 Integrations:
-          - Tools and data formats: integrations/index.md
-          - Pandas and PyArrow: python/pandas_and_pyarrow.md
-          - Polars: python/polars_arrow.md
-          - DuckDB: python/duckdb.md
-          - Datafusion: python/datafusion.md
-          - LangChain:
-              - LangChain 🔗: integrations/langchain.md
-              - LangChain demo: notebooks/langchain_demo.ipynb
-              - LangChain JS/TS 🔗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
-          - LlamaIndex 🦙:
-              - LlamaIndex docs: integrations/llamaIndex.md
-              - LlamaIndex demo: notebooks/llamaIndex_demo.ipynb
-          - Pydantic: python/pydantic.md
-          - Voxel51: integrations/voxel51.md
-          - PromptTools: integrations/prompttools.md
-          - dlt: integrations/dlt.md
-          - phidata: integrations/phidata.md
-          - Genkit: integrations/genkit.md
-      - 🎯 Examples:
-          - Overview: examples/index.md
-          - 🐍 Python:
-              - Overview: examples/examples_python.md
-              - Build From Scratch: examples/python_examples/build_from_scratch.md
-              - Multimodal: examples/python_examples/multimodal.md
-              - Rag: examples/python_examples/rag.md
-              - Vector Search: examples/python_examples/vector_search.md
-              - Chatbot: examples/python_examples/chatbot.md
-              - Evaluation: examples/python_examples/evaluations.md
-              - AI Agent: examples/python_examples/aiagent.md
-              - Recommender System: examples/python_examples/recommendersystem.md
-              - Miscellaneous:
-                  - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
-                  - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
-          - 👾 JavaScript:
-              - Overview: examples/examples_js.md
-              - Serverless Website Chatbot: examples/serverless_website_chatbot.md
-              - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
-              - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
-          - 🦀 Rust:
-              - Overview: examples/examples_rust.md
-      - 📓 Studies:
-          - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
-      - 💭 FAQs: faq.md
-      - 🔍 Troubleshooting: troubleshooting.md
-      - ⚙️ API reference:
-          - 🐍 Python: python/python.md
-          - 👾 JavaScript (vectordb): javascript/modules.md
-          - 👾 JavaScript (lancedb): js/globals.md
-          - 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
-
-  - Quick start: basic.md
-  - Concepts:
-      - Vector search: concepts/vector_search.md
-      - Indexing:
-          - IVFPQ: concepts/index_ivfpq.md
-          - HNSW: concepts/index_hnsw.md
-      - Storage: concepts/storage.md
-      - Data management: concepts/data_management.md
-  - Guides:
-      - Working with tables: guides/tables.md
-      - Working with SQL: guides/sql_querying.md
-      - Building an ANN index: ann_indexes.md
-      - Vector Search: search.md
-      - Full-text search (native): fts.md
-      - Full-text search (tantivy-based): fts_tantivy.md
-      - Building a scalar index: guides/scalar_index.md
-      - Hybrid search:
-          - Overview: hybrid_search/hybrid_search.md
-          - Comparing Rerankers: hybrid_search/eval.md
-          - Airbnb financial data example: notebooks/hybrid_search.ipynb
-      - Late interaction with MultiVector search:
-          - Overview: guides/multi-vector.md
-          - Document search Example: notebooks/Multivector_on_LanceDB.ipynb
-      - RAG:
-          - Vanilla RAG: rag/vanilla_rag.md
-          - Multi-head RAG: rag/multi_head_rag.md
-          - Corrective RAG: rag/corrective_rag.md
-          - Agentic RAG: rag/agentic_rag.md
-          - Graph RAG: rag/graph_rag.md
-          - Self RAG: rag/self_rag.md
-          - Adaptive RAG: rag/adaptive_rag.md
-          - SFR RAG: rag/sfr_rag.md
-          - Advanced Techniques:
-              - HyDE: rag/advanced_techniques/hyde.md
-              - FLARE: rag/advanced_techniques/flare.md
-      - Reranking:
-          - Quickstart: reranking/index.md
-          - Cohere Reranker: reranking/cohere.md
-          - Linear Combination Reranker: reranking/linear_combination.md
-          - Reciprocal Rank Fusion Reranker: reranking/rrf.md
-          - Cross Encoder Reranker: reranking/cross_encoder.md
-          - ColBERT Reranker: reranking/colbert.md
-          - Jina Reranker: reranking/jina.md
-          - OpenAI Reranker: reranking/openai.md
-          - AnswerDotAi Rerankers: reranking/answerdotai.md
-          - Building Custom Rerankers: reranking/custom_reranker.md
-          - Example: notebooks/lancedb_reranking.ipynb
-      - Filtering: sql.md
-      - Versioning & Reproducibility:
-          - sync API: notebooks/reproducibility.ipynb
-          - async API: notebooks/reproducibility_async.ipynb
-      - Configuring Storage: guides/storage.md
-      - Migration Guide: migration.md
-      - Tuning retrieval performance:
-          - Choosing right query type: guides/tuning_retrievers/1_query_types.md
-          - Reranking: guides/tuning_retrievers/2_reranking.md
-          - Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
-  - Managing Embeddings:
-      - Understand Embeddings: embeddings/understanding_embeddings.md
-      - Get Started: embeddings/index.md
-      - Embedding functions: embeddings/embedding_functions.md
-      - Available models:
-          - Overview: embeddings/default_embedding_functions.md
-          - Text Embedding Functions:
-              - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
-              - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
-              - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
-              - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
-              - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
-              - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
-              - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
-              - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
-              - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
-              - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
-          - Multimodal Embedding Functions:
-              - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
-              - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
-              - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
-      - User-defined embedding functions: embeddings/custom_embedding_function.md
-      - Variables and secrets: embeddings/variables_and_secrets.md
-      - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
-      - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
-  - Integrations:
-      - Overview: integrations/index.md
-      - Pandas and PyArrow: python/pandas_and_pyarrow.md
-      - Polars: python/polars_arrow.md
-      - DuckDB: python/duckdb.md
-      - Datafusion: python/datafusion.md
-      - LangChain 🦜️🔗↗: integrations/langchain.md
-      - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
-      - LlamaIndex 🦙↗: integrations/llamaIndex.md
-      - Pydantic: python/pydantic.md
-      - Voxel51: integrations/voxel51.md
-      - PromptTools: integrations/prompttools.md
-      - dlt: integrations/dlt.md
-      - phidata: integrations/phidata.md
-      - Genkit: integrations/genkit.md
-  - Examples:
-      - examples/index.md
-      - 🐍 Python:
-          - Overview: examples/examples_python.md
-          - Build From Scratch: examples/python_examples/build_from_scratch.md
-          - Multimodal: examples/python_examples/multimodal.md
-          - Rag: examples/python_examples/rag.md
-          - Vector Search: examples/python_examples/vector_search.md
-          - Chatbot: examples/python_examples/chatbot.md
-          - Evaluation: examples/python_examples/evaluations.md
-          - AI Agent: examples/python_examples/aiagent.md
-          - Recommender System: examples/python_examples/recommendersystem.md
-          - Miscellaneous:
-              - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
-              - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
-      - 👾 JavaScript:
-          - Overview: examples/examples_js.md
-          - Serverless Website Chatbot: examples/serverless_website_chatbot.md
-          - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
-          - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
-      - 🦀 Rust:
-          - Overview: examples/examples_rust.md
-  - Studies:
-      - studies/overview.md
-      - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
  - API reference:
      - Overview: api_reference.md
      - Python: python/python.md
--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -0,0 +1,84 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / Session
+
+# Class: Session
+
+A session for managing caches and object stores across LanceDB operations.
+
+Sessions allow you to configure cache sizes for index and metadata caches,
+which can significantly impact performance for large datasets.
+
+## Constructors
+
+### new Session()
+
+```ts
+new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
+```
+
+Create a new session with custom cache sizes.
+
+# Parameters
+
+- `index_cache_size_bytes`: The size of the index cache in bytes.
+  Defaults to 6GB if not specified.
+- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
+  Defaults to 1GB if not specified.
+
+#### Parameters
+
+* **indexCacheSizeBytes?**: `null` \| `bigint`
+
+* **metadataCacheSizeBytes?**: `null` \| `bigint`
+
+#### Returns
+
+[`Session`](Session.md)
+
+## Methods
+
+### approxNumItems()
+
+```ts
+approxNumItems(): number
+```
+
+Get the approximate number of items cached in the session.
+
+#### Returns
+
+`number`
+
+***
+
+### sizeBytes()
+
+```ts
+sizeBytes(): bigint
+```
+
+Get the current size of the session caches in bytes.
+
+#### Returns
+
+`bigint`
+
+***
+
+### default()
+
+```ts
+static default(): Session
+```
+
+Create a session with default cache sizes.
+
+This is equivalent to creating a session with 6GB index cache
+and 1GB metadata cache.
+
+#### Returns
+
+[`Session`](Session.md)
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,10 +6,13 @@

 # Function: connect()

-## connect(uri, options)
+## connect(uri, options, session)

 ```ts
-function connect(uri, options?): Promise<Connection>
+function connect(
+   uri,
+   options?,
+   session?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -29,6 +32,8 @@ Accepted formats:
 * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database

+* **session?**: [`Session`](../classes/Session.md)
+
 ### Returns

 `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -77,7 +82,7 @@ Accepted formats:

 [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.

-### Example
+### Examples

 ```ts
 const conn = await connect({
@@ -85,3 +90,11 @@ const conn = await connect({
  storageOptions: {timeout: "60s"}
 });
 ```
+
+```ts
+const session = Session.default();
+const conn = await connect({
+  uri: "/path/to/database",
+  session: session
+});
+```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -29,6 +29,7 @@
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
+- [Session](classes/Session.md)
 - [Table](classes/Table.md)
 - [TagContents](classes/TagContents.md)
 - [Tags](classes/Tags.md)
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.

 ***

+### session?
+
+```ts
+optional session: Session;
+```
+
+(For LanceDB OSS only): the session to use for this connection. Holds
+shared caches and other session-specific state.
+
+***
+
 ### storageOptions?

 ```ts
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### indexCacheSize?
+### ~~indexCacheSize?~~

 ```ts
 optional indexCacheSize: number;
@@ -16,6 +16,11 @@ optional indexCacheSize: number;

 Set the size of the index cache, specified as a number of entries

+#### Deprecated
+
+Use session-level cache configuration instead.
+Create a Session with custom cache sizes and pass it to the connect() function.
+
 The exact meaning of an "entry" will depend on the type of index:
 - IVF: there is one entry for each IVF partition
 - BTREE: there is one entry for the entire index
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
 lance = { workspace = true }
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema.workspace = true
-tokio = "1.23"
+tokio = "1.46"
 jni = "0.21.1"
 snafu.workspace = true
 lazy_static.workspace = true
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.0</version>
+        <version>0.21.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.0</version>
+        <version>0.21.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.2-beta.0</version>
+    <version>0.21.2-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/node/.eslintrc.js
+++ b/node/.eslintrc.js
@@ -1,22 +0,0 @@
-module.exports = {
-  env: {
-    browser: true,
-    es2021: true
-  },
-  extends: 'standard-with-typescript',
-  overrides: [
-  ],
-  parserOptions: {
-    project: './tsconfig.json',
-    ecmaVersion: 'latest',
-    sourceType: 'module'
-  },
-  rules: {
-    "@typescript-eslint/method-signature-style": "off",
-    "@typescript-eslint/quotes": "off",
-    "@typescript-eslint/semi": "off",
-    "@typescript-eslint/explicit-function-return-type": "off",
-    "@typescript-eslint/space-before-function-paren": "off",
-    "@typescript-eslint/indent": "off",
-  }
-}
--- a/node/.npmignore
+++ b/node/.npmignore
@@ -1,4 +0,0 @@
-gen_test_data.py
-index.node
-dist/lancedb*.tgz
-vectordb*.tgz
--- a/node/CHANGELOG.md
+++ b/node/CHANGELOG.md
@@ -1,64 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [0.1.5] - 2023-06-00
-
-### Added
-
- Support for macOS X86
-
-## [0.1.4] - 2023-06-03
-
-### Added
-
- Select / Project query API
-
-### Changed
-
-  Deprecated created_index in favor of createIndex
-
-## [0.1.3] - 2023-06-01
-
-### Added
-
- Support S3 and Google Cloud Storage
- Embedding functions support
- OpenAI embedding function
-
-## [0.1.2] - 2023-05-27
-
-### Added
-
- Append records API
- Extra query params to to nodejs client
- Create_index API
- 
-### Fixed
-
- bugfix: string columns should be converted to Utf8Array (#94)
-
-## [0.1.1] - 2023-05-16
-
-### Added
-
- create_table API
- limit parameter for queries
- Typescript / JavaScript examples
- Linux support
-
-## [0.1.0] - 2023-05-16
-
-### Added
-
- Initial  JavaScript / Node.js library for LanceDB
- Read-only api to query LanceDB datasets
- Supports macOS arm only
-
-## [pre-0.1.0]
-
- Various prototypes / test builds
-
--- a/node/README.md
+++ b/node/README.md
@@ -1,66 +0,0 @@
-# LanceDB
-
-A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
-
-**DEPRECATED: This library is deprecated. Please use the new client,
-[@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
-
-## Installation
-
-```bash
-npm install vectordb
-```
-
-This will download the appropriate native library for your platform. We currently
-support:
-
-* Linux (x86_64 and aarch64)
-* MacOS (Intel and ARM/M1/M2)
-* Windows (x86_64 only)
-
-We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
-
-## Usage
-
-### Basic Example
-
-```javascript
-const lancedb = require('vectordb');
-const db = await lancedb.connect('data/sample-lancedb');
-const table = await db.createTable("my_table",
-      [{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
-      { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
-const results = await table.search([0.1, 0.3]).limit(20).execute();
-console.log(results);
-```
-
-The [examples](./examples) folder contains complete examples.
-
-## Development
-
-To build everything fresh:
-
-```bash
-npm install
-npm run build
-```
-
-Then you should be able to run the tests with:
-
-```bash
-npm test
-```
-
-### Fix lints
-
-To run the linter and have it automatically fix all errors
-
-```bash
-npm run lint -- --fix
-```
-
-To build documentation
-
-```bash
-npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
-```
--- a/node/examples/js-openai/index.js
+++ b/node/examples/js-openai/index.js
@@ -1,41 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-async function example () {
-  const lancedb = require('vectordb')
-  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
-  const apiKey = process.env.OPENAI_API_KEY
-  // The embedding function will create embeddings for the 'text' column(text in this case)
-  const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
-
-  const db = await lancedb.connect('data/sample-lancedb')
-
-  const data = [
-    { id: 1, text: 'Black T-Shirt', price: 10 },
-    { id: 2, text: 'Leather Jacket', price: 50 }
-  ]
-
-  const table = await db.createTable('vectors', data, embedding)
-  console.log(await db.tableNames())
-
-  const results = await table
-    .search('keeps me warm')
-    .limit(1)
-    .execute()
-  console.log(results[0].text)
-}
-
-example().then(_ => { console.log('All done!') })
--- a/node/examples/js-openai/package.json
+++ b/node/examples/js-openai/package.json
@@ -1,15 +0,0 @@
-{
-  "name": "vectordb-example-js-openai",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../..",
-    "openai": "^3.2.1"
-  }
-}
--- a/node/examples/js-transformers/index.js
+++ b/node/examples/js-transformers/index.js
@@ -1,66 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-
-async function example() {
-
-    const lancedb = require('vectordb')
-
-    // Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
-    const { pipeline } = await import('@xenova/transformers')
-    const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
-
-
-    // Create embedding function from pipeline which returns a list of vectors from batch
-    // sourceColumn is the name of the column in the data to be embedded
-    //
-    // Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
-    const embed_fun = {}
-    embed_fun.sourceColumn = 'text'
-    embed_fun.embed = async function (batch) {
-        let result = []
-        for (let text of batch) {
-            const res = await pipe(text, { pooling: 'mean', normalize: true })
-            result.push(Array.from(res['data']))
-        }
-        return (result)
-    }
-
-    // Link a folder and create a table with data
-    const db = await lancedb.connect('data/sample-lancedb')
-
-    const data = [
-        { id: 1, text: 'Cherry', type: 'fruit' },
-        { id: 2, text: 'Carrot', type: 'vegetable' },
-        { id: 3, text: 'Potato', type: 'vegetable' },
-        { id: 4, text: 'Apple', type: 'fruit' },
-        { id: 5, text: 'Banana', type: 'fruit' }
-    ]
-
-    const table = await db.createTable('food_table', data, embed_fun)
-
-
-    // Query the table
-    const results = await table
-        .search("a sweet fruit to eat")
-        .metricType("cosine")
-        .limit(2)
-        .execute()
-    console.log(results.map(r => r.text))
-
-}
-
-example().then(_ => { console.log("Done!") })
--- a/node/examples/js-transformers/package.json
+++ b/node/examples/js-transformers/package.json
@@ -1,16 +0,0 @@
-{
-  "name": "vectordb-example-js-transformers",
-  "version": "1.0.0",
-  "description": "Example for using transformers.js with lancedb",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "@xenova/transformers": "^2.4.1",
-    "vectordb": "file:../.."
-  }
-
-}
--- a/node/examples/js-youtube-transcripts/index.js
+++ b/node/examples/js-youtube-transcripts/index.js
@@ -1,122 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-const lancedb = require('vectordb')
-const fs = require('fs/promises')
-const readline = require('readline/promises')
-const { stdin: input, stdout: output } = require('process')
-const { Configuration, OpenAIApi } = require('openai')
-
-// Download file from XYZ
-const INPUT_FILE_NAME = 'data/youtube-transcriptions_sample.jsonl';
-
-(async () => {
-  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
-  const apiKey = process.env.OPENAI_API_KEY
-  // The embedding function will create embeddings for the 'context' column
-  const embedFunction = new lancedb.OpenAIEmbeddingFunction('context', apiKey)
-
-  // Connects to LanceDB
-  const db = await lancedb.connect('data/youtube-lancedb')
-
-  // Open the vectors table or create one if it does not exist
-  let tbl
-  if ((await db.tableNames()).includes('vectors')) {
-    tbl = await db.openTable('vectors', embedFunction)
-  } else {
-    tbl = await createEmbeddingsTable(db, embedFunction)
-  }
-
-  // Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
-  const configuration = new Configuration({ apiKey })
-  const openai = new OpenAIApi(configuration)
-  const rl = readline.createInterface({ input, output })
-  try {
-    while (true) {
-      const query = await rl.question('Prompt: ')
-      const results = await tbl
-        .search(query)
-        .select(['title', 'text', 'context'])
-        .limit(3)
-        .execute()
-
-      // console.table(results)
-
-      const response = await openai.createCompletion({
-        model: 'text-davinci-003',
-        prompt: createPrompt(query, results),
-        max_tokens: 400,
-        temperature: 0,
-        top_p: 1,
-        frequency_penalty: 0,
-        presence_penalty: 0
-      })
-      console.log(response.data.choices[0].text)
-    }
-  } catch (err) {
-    console.log('Error: ', err)
-  } finally {
-    rl.close()
-  }
-  process.exit(1)
-})()
-
-async function createEmbeddingsTable (db, embedFunction) {
-  console.log(`Creating embeddings from ${INPUT_FILE_NAME}`)
-  // read the input file into a JSON array, skipping empty lines
-  const lines = (await fs.readFile(INPUT_FILE_NAME, 'utf-8'))
-    .toString()
-    .split('\n')
-    .filter(line => line.length > 0)
-    .map(line => JSON.parse(line))
-
-  const data = contextualize(lines, 20, 'video_id')
-  return await db.createTable('vectors', data, embedFunction)
-}
-
-// Each transcript has a small text column, we include previous transcripts in order to
-// have more context information when creating embeddings
-function contextualize (rows, contextSize, groupColumn) {
-  const grouped = []
-  rows.forEach(row => {
-    if (!grouped[row[groupColumn]]) {
-      grouped[row[groupColumn]] = []
-    }
-    grouped[row[groupColumn]].push(row)
-  })
-
-  const data = []
-  Object.keys(grouped).forEach(key => {
-    for (let i = 0; i < grouped[key].length; i++) {
-      const start = i - contextSize > 0 ? i - contextSize : 0
-      grouped[key][i].context = grouped[key].slice(start, i + 1).map(r => r.text).join(' ')
-    }
-    data.push(...grouped[key])
-  })
-  return data
-}
-
-// Creates a prompt by aggregating all relevant contexts
-function createPrompt (query, context) {
-  let prompt =
-      'Answer the question based on the context below.\n\n' +
-      'Context:\n'
-
-  // need to make sure our prompt is not larger than max size
-  prompt = prompt + context.map(c => c.context).join('\n\n---\n\n').substring(0, 3750)
-  prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`
-  return prompt
-}
--- a/node/examples/js-youtube-transcripts/package.json
+++ b/node/examples/js-youtube-transcripts/package.json
@@ -1,15 +0,0 @@
-{
-  "name": "vectordb-example-js-openai",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../..",
-    "openai": "^3.2.1"
-  }
-}
--- a/node/examples/js/index.js
+++ b/node/examples/js/index.js
@@ -1,36 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-async function example () {
-  const lancedb = require('vectordb')
-  const db = await lancedb.connect('data/sample-lancedb')
-
-  const data = [
-    { id: 1, vector: [0.1, 0.2], price: 10 },
-    { id: 2, vector: [1.1, 1.2], price: 50 }
-  ]
-
-  const table = await db.createTable('vectors', data)
-  console.log(await db.tableNames())
-
-  const results = await table
-      .search([0.1, 0.3])
-      .limit(20)
-      .execute()
-  console.log(results)
-}
-
-example()
--- a/node/examples/js/package.json
+++ b/node/examples/js/package.json
@@ -1,14 +0,0 @@
-{
-  "name": "vectordb-example-js",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../.."
-  }
-}
--- a/node/examples/ts/package.json
+++ b/node/examples/ts/package.json
@@ -1,22 +0,0 @@
-{
-  "name": "vectordb-example-ts",
-  "version": "1.0.0",
-  "description": "",
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "scripts": {
-    "tsc": "tsc -b",
-    "build": "tsc"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "devDependencies": {
-    "@types/node": "^18.16.2",
-    "ts-node": "^10.9.1",
-    "ts-node-dev": "^2.0.0",
-    "typescript": "*"
-  },
-  "dependencies": {
-    "vectordb": "file:../.."
-  }
-}
--- a/node/examples/ts/src/index.ts
+++ b/node/examples/ts/src/index.ts
@@ -1,35 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import * as vectordb from 'vectordb';
-
-async function example () {
-    const db = await vectordb.connect('data/sample-lancedb')
-
-    const data = [
-        { id: 1, vector: [0.1, 0.2], price: 10 },
-        { id: 2, vector: [1.1, 1.2], price: 50 }
-    ]
-
-    const table = await db.createTable('vectors', data)
-    console.log(await db.tableNames())
-
-    const results = await table
-        .search([0.1, 0.3])
-        .limit(20)
-        .execute()
-    console.log(results)
-}
-
-example().then(_ => { console.log ("All done!") })
--- a/node/examples/ts/tsconfig.json
+++ b/node/examples/ts/tsconfig.json
@@ -1,10 +0,0 @@
-{
-  "include": ["src/**/*.ts"],
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "commonjs",
-    "declaration": true,
-    "outDir": "./dist",
-    "strict": true
-  }
-}
--- a/node/native.js
+++ b/node/native.js
@@ -1,36 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-const { currentTarget } = require('@neon-rs/load')
-
-let nativeLib
-
-try {
-  // When developing locally, give preference to the local built library
-  nativeLib = require('./index.node')
-} catch {
-  try {
-    nativeLib = require(`@lancedb/vectordb-${currentTarget()}`)
-  } catch (e) {
-    throw new Error(`vectordb: failed to load native library.
-  You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
-
-  If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
-      
-  Source error: ${e}`)
-  }
-}
-
-// Dynamic require for runtime.
-module.exports = nativeLib
--- a/node/package-lock.json
+++ b/node/package-lock.json
--- a/node/package.json
+++ b/node/package.json
@@ -1,98 +0,0 @@
-{
-  "name": "vectordb",
-  "version": "0.21.2-beta.0",
-  "description": " Serverless, low-latency vector database for AI applications",
-  "private": false,
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "scripts": {
-    "tsc": "tsc -b",
-    "build": "npm run tsc && cargo-cp-artifact --artifact cdylib lancedb_node index.node -- cargo build -p lancedb-node --message-format=json",
-    "build-release": "npm run build -- --release",
-    "test": "npm run tsc && mocha -recursive dist/test",
-    "integration-test": "npm run tsc && mocha -recursive dist/integration_test",
-    "lint": "eslint native.js src --ext .js,.ts",
-    "clean": "rm -rf node_modules *.node dist/",
-    "pack-build": "neon pack-build",
-    "check-npm": "printenv && which node && which npm && npm --version"
-  },
-  "repository": {
-    "type": "git",
-    "url": "https://github.com/lancedb/lancedb.git"
-  },
-  "homepage": "https://lancedb.github.io/lancedb/",
-  "bugs": {
-    "url": "https://github.com/lancedb/lancedb/issues"
-  },
-  "keywords": [
-    "data-format",
-    "data-science",
-    "machine-learning",
-    "data-analytics"
-  ],
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "devDependencies": {
-    "@neon-rs/cli": "^0.0.160",
-    "@types/chai": "^4.3.4",
-    "@types/chai-as-promised": "^7.1.5",
-    "@types/mocha": "^10.0.1",
-    "@types/node": "^18.16.2",
-    "@types/sinon": "^10.0.15",
-    "@types/temp": "^0.9.1",
-    "@types/uuid": "^9.0.3",
-    "@typescript-eslint/eslint-plugin": "^5.59.1",
-    "apache-arrow-old": "npm:apache-arrow@13.0.0",
-    "cargo-cp-artifact": "^0.1",
-    "chai": "^4.3.7",
-    "chai-as-promised": "^7.1.1",
-    "eslint": "^8.39.0",
-    "eslint-config-standard-with-typescript": "^34.0.1",
-    "eslint-plugin-import": "^2.26.0",
-    "eslint-plugin-n": "^15.7.0",
-    "eslint-plugin-promise": "^6.1.1",
-    "mocha": "^10.2.0",
-    "openai": "^4.24.1",
-    "sinon": "^15.1.0",
-    "temp": "^0.9.4",
-    "ts-node": "^10.9.1",
-    "ts-node-dev": "^2.0.0",
-    "typedoc": "^0.24.7",
-    "typedoc-plugin-markdown": "^3.15.3",
-    "typescript": "^5.1.0",
-    "uuid": "^9.0.0"
-  },
-  "dependencies": {
-    "@neon-rs/load": "^0.0.74",
-    "axios": "^1.4.0"
-  },
-  "peerDependencies": {
-    "@apache-arrow/ts": "^14.0.2",
-    "apache-arrow": "^14.0.2"
-  },
-  "os": [
-    "darwin",
-    "linux",
-    "win32"
-  ],
-  "cpu": [
-    "x64",
-    "arm64"
-  ],
-  "neon": {
-    "targets": {
-      "x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
-      "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
-      "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
-      "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
-      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
-    }
-  },
-  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
-    "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
-    "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
-    "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
-  }
-}
--- a/node/src/arrow.ts
+++ b/node/src/arrow.ts
@@ -1,635 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import {
-  Field,
-  makeBuilder,
-  RecordBatchFileWriter,
-  Utf8,
-  type Vector,
-  FixedSizeList,
-  vectorFromArray,
-  Schema,
-  Table as ArrowTable,
-  RecordBatchStreamWriter,
-  List,
-  RecordBatch,
-  makeData,
-  Struct,
-  type Float,
-  DataType,
-  Binary,
-  Float32
-} from "apache-arrow";
-import { type EmbeddingFunction } from "./index";
-import { sanitizeSchema } from "./sanitize";
-
-/*
- * Options to control how a column should be converted to a vector array
- */
-export class VectorColumnOptions {
-  /** Vector column type. */
-  type: Float = new Float32();
-
-  constructor(values?: Partial<VectorColumnOptions>) {
-    Object.assign(this, values);
-  }
-}
-
-/** Options to control the makeArrowTable call. */
-export class MakeArrowTableOptions {
-  /*
-   * Schema of the data.
-   *
-   * If this is not provided then the data type will be inferred from the
-   * JS type.  Integer numbers will become int64, floating point numbers
-   * will become float64 and arrays will become variable sized lists with
-   * the data type inferred from the first element in the array.
-   *
-   * The schema must be specified if there are no records (e.g. to make
-   * an empty table)
-   */
-  schema?: Schema;
-
-  /*
-   * Mapping from vector column name to expected type
-   *
-   * Lance expects vector columns to be fixed size list arrays (i.e. tensors)
-   * However, `makeArrowTable` will not infer this by default (it creates
-   * variable size list arrays).  This field can be used to indicate that a column
-   * should be treated as a vector column and converted to a fixed size list.
-   *
-   * The keys should be the names of the vector columns.  The value specifies the
-   * expected data type of the vector columns.
-   *
-   * If `schema` is provided then this field is ignored.
-   *
-   * By default, the column named "vector" will be assumed to be a float32
-   * vector column.
-   */
-  vectorColumns: Record<string, VectorColumnOptions> = {
-    vector: new VectorColumnOptions()
-  };
-
-  embeddings?: EmbeddingFunction<any>;
-
-  /**
-   * If true then string columns will be encoded with dictionary encoding
-   *
-   * Set this to true if your string columns tend to repeat the same values
-   * often.  For more precise control use the `schema` property to specify the
-   * data type for individual columns.
-   *
-   * If `schema` is provided then this property is ignored.
-   */
-  dictionaryEncodeStrings: boolean = false;
-
-  constructor(values?: Partial<MakeArrowTableOptions>) {
-    Object.assign(this, values);
-  }
-}
-
-/**
- * An enhanced version of the {@link makeTable} function from Apache Arrow
- * that supports nested fields and embeddings columns.
- *
- * This function converts an array of Record<String, any> (row-major JS objects)
- * to an Arrow Table (a columnar structure)
- *
- * Note that it currently does not support nulls.
- *
- * If a schema is provided then it will be used to determine the resulting array
- * types.  Fields will also be reordered to fit the order defined by the schema.
- *
- * If a schema is not provided then the types will be inferred and the field order
- * will be controlled by the order of properties in the first record.
- *
- * If the input is empty then a schema must be provided to create an empty table.
- *
- * When a schema is not specified then data types will be inferred.  The inference
- * rules are as follows:
- *
- *  - boolean => Bool
- *  - number => Float64
- *  - String => Utf8
- *  - Buffer => Binary
- *  - Record<String, any> => Struct
- *  - Array<any> => List
- *
- * @param data input data
- * @param options options to control the makeArrowTable call.
- *
- * @example
- *
- * ```ts
- *
- * import { fromTableToBuffer, makeArrowTable } from "../arrow";
- * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
- *
- * const schema = new Schema([
- *   new Field("a", new Int32()),
- *   new Field("b", new Float32()),
- *   new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
- *  ]);
- *  const table = makeArrowTable([
- *    { a: 1, b: 2, c: [1, 2, 3] },
- *    { a: 4, b: 5, c: [4, 5, 6] },
- *    { a: 7, b: 8, c: [7, 8, 9] },
- *  ], { schema });
- * ```
- *
- * By default it assumes that the column named `vector` is a vector column
- * and it will be converted into a fixed size list array of type float32.
- * The `vectorColumns` option can be used to support other vector column
- * names and data types.
- *
- * ```ts
- *
- * const schema = new Schema([
-    new Field("a", new Float64()),
-    new Field("b", new Float64()),
-    new Field(
-      "vector",
-      new FixedSizeList(3, new Field("item", new Float32()))
-    ),
-  ]);
-  const table = makeArrowTable([
-    { a: 1, b: 2, vector: [1, 2, 3] },
-    { a: 4, b: 5, vector: [4, 5, 6] },
-    { a: 7, b: 8, vector: [7, 8, 9] },
-  ]);
-  assert.deepEqual(table.schema, schema);
- * ```
- *
- * You can specify the vector column types and names using the options as well
- *
- * ```typescript
- *
- * const schema = new Schema([
-    new Field('a', new Float64()),
-    new Field('b', new Float64()),
-    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
-    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
-  ]);
- * const table = makeArrowTable([
-    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
-  ], {
-    vectorColumns: {
-      vec1: { type: new Float16() },
-      vec2: { type: new Float16() }
-    }
-  }
- * assert.deepEqual(table.schema, schema)
- * ```
- */
-export function makeArrowTable(
-  data: Array<Record<string, any>>,
-  options?: Partial<MakeArrowTableOptions>
-): ArrowTable {
-  if (
-    data.length === 0 &&
-    (options?.schema === undefined || options?.schema === null)
-  ) {
-    throw new Error("At least one record or a schema needs to be provided");
-  }
-
-  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
-  if (opt.schema !== undefined && opt.schema !== null) {
-    opt.schema = sanitizeSchema(opt.schema);
-    opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
-  }
-
-  const columns: Record<string, Vector> = {};
-  // TODO: sample dataset to find missing columns
-  // Prefer the field ordering of the schema, if present
-  const columnNames =
-    opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
-  for (const colName of columnNames) {
-    if (
-      data.length !== 0 &&
-      !Object.prototype.hasOwnProperty.call(data[0], colName)
-    ) {
-      // The field is present in the schema, but not in the data, skip it
-      continue;
-    }
-    // Extract a single column from the records (transpose from row-major to col-major)
-    let values = data.map((datum) => datum[colName]);
-
-    // By default (type === undefined) arrow will infer the type from the JS type
-    let type;
-    if (opt.schema !== undefined) {
-      // If there is a schema provided, then use that for the type instead
-      type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
-      if (DataType.isInt(type) && type.bitWidth === 64) {
-        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
-        values = values.map((v) => {
-          if (v === null) {
-            return v;
-          }
-          return BigInt(v);
-        });
-      }
-    } else {
-      // Otherwise, check to see if this column is one of the vector columns
-      // defined by opt.vectorColumns and, if so, use the fixed size list type
-      const vectorColumnOptions = opt.vectorColumns[colName];
-      if (vectorColumnOptions !== undefined) {
-        type = newVectorType(values[0].length, vectorColumnOptions.type);
-      }
-    }
-
-    try {
-      // Convert an Array of JS values to an arrow vector
-      columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
-    } catch (error: unknown) {
-      // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-      throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
-    }
-  }
-
-  if (opt.schema != null) {
-    // `new ArrowTable(columns)` infers a schema which may sometimes have
-    // incorrect nullability (it assumes nullable=true if there are 0 rows)
-    //
-    // `new ArrowTable(schema, columns)` will also fail because it will create a
-    // batch with an inferred schema and then complain that the batch schema
-    // does not match the provided schema.
-    //
-    // To work around this we first create a table with the wrong schema and
-    // then patch the schema of the batches so we can use
-    // `new ArrowTable(schema, batches)` which does not do any schema inference
-    const firstTable = new ArrowTable(columns);
-    const batchesFixed = firstTable.batches.map(
-      // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-      (batch) => new RecordBatch(opt.schema!, batch.data)
-    );
-    return new ArrowTable(opt.schema, batchesFixed);
-  } else {
-    return new ArrowTable(columns);
-  }
-}
-
-/**
- * Create an empty Arrow table with the provided schema
- */
-export function makeEmptyTable(schema: Schema): ArrowTable {
-  return makeArrowTable([], { schema });
-}
-
-// Helper function to convert Array<Array<any>> to a variable sized list array
-function makeListVector(lists: any[][]): Vector<any> {
-  if (lists.length === 0 || lists[0].length === 0) {
-    throw Error("Cannot infer list vector from empty array or empty list");
-  }
-  const sampleList = lists[0];
-  let inferredType;
-  try {
-    const sampleVector = makeVector(sampleList);
-    inferredType = sampleVector.type;
-  } catch (error: unknown) {
-    // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-    throw Error(`Cannot infer list vector.  Cannot infer inner type: ${error}`);
-  }
-
-  const listBuilder = makeBuilder({
-    type: new List(new Field("item", inferredType, true))
-  });
-  for (const list of lists) {
-    listBuilder.append(list);
-  }
-  return listBuilder.finish().toVector();
-}
-
-// Helper function to convert an Array of JS values to an Arrow Vector
-function makeVector(
-  values: any[],
-  type?: DataType,
-  stringAsDictionary?: boolean
-): Vector<any> {
-  if (type !== undefined) {
-    // No need for inference, let Arrow create it
-    return vectorFromArray(values, type);
-  }
-  if (values.length === 0) {
-    throw Error(
-      "makeVector requires at least one value or the type must be specfied"
-    );
-  }
-  const sampleValue = values.find((val) => val !== null && val !== undefined);
-  if (sampleValue === undefined) {
-    throw Error(
-      "makeVector cannot infer the type if all values are null or undefined"
-    );
-  }
-  if (Array.isArray(sampleValue)) {
-    // Default Arrow inference doesn't handle list types
-    return makeListVector(values);
-  } else if (Buffer.isBuffer(sampleValue)) {
-    // Default Arrow inference doesn't handle Buffer
-    return vectorFromArray(values, new Binary());
-  } else if (
-    !(stringAsDictionary ?? false) &&
-    (typeof sampleValue === "string" || sampleValue instanceof String)
-  ) {
-    // If the type is string then don't use Arrow's default inference unless dictionaries are requested
-    // because it will always use dictionary encoding for strings
-    return vectorFromArray(values, new Utf8());
-  } else {
-    // Convert a JS array of values to an arrow vector
-    return vectorFromArray(values);
-  }
-}
-
-async function applyEmbeddings<T>(
-  table: ArrowTable,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<ArrowTable> {
-  if (embeddings == null) {
-    return table;
-  }
-  if (schema !== undefined && schema !== null) {
-    schema = sanitizeSchema(schema);
-  }
-
-  // Convert from ArrowTable to Record<String, Vector>
-  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
-    const name = table.schema.fields[idx].name;
-    // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-    const vec = table.getChildAt(idx)!;
-    return [name, vec];
-  });
-  const newColumns = Object.fromEntries(colEntries);
-
-  const sourceColumn = newColumns[embeddings.sourceColumn];
-  const destColumn = embeddings.destColumn ?? "vector";
-  const innerDestType = embeddings.embeddingDataType ?? new Float32();
-  if (sourceColumn === undefined) {
-    throw new Error(
-      `Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`
-    );
-  }
-
-  if (table.numRows === 0) {
-    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      // We have an empty table and it already has the embedding column so no work needs to be done
-      // Note: we don't return an error like we did below because this is a common occurrence.  For example,
-      // if we call convertToTable with 0 records and a schema that includes the embedding
-      return table;
-    }
-    if (embeddings.embeddingDimension !== undefined) {
-      const destType = newVectorType(
-        embeddings.embeddingDimension,
-        innerDestType
-      );
-      newColumns[destColumn] = makeVector([], destType);
-    } else if (schema != null) {
-      const destField = schema.fields.find((f) => f.name === destColumn);
-      if (destField != null) {
-        newColumns[destColumn] = makeVector([], destField.type);
-      } else {
-        throw new Error(
-          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`
-        );
-      }
-    } else {
-      throw new Error(
-        "Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`"
-      );
-    }
-  } else {
-    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      throw new Error(
-        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`
-      );
-    }
-    if (table.batches.length > 1) {
-      throw new Error(
-        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch"
-      );
-    }
-    const values = sourceColumn.toArray();
-    const vectors = await embeddings.embed(values as T[]);
-    if (vectors.length !== values.length) {
-      throw new Error(
-        "Embedding function did not return an embedding for each input element"
-      );
-    }
-    const destType = newVectorType(vectors[0].length, innerDestType);
-    newColumns[destColumn] = makeVector(vectors, destType);
-  }
-
-  const newTable = new ArrowTable(newColumns);
-  if (schema != null) {
-    if (schema.fields.find((f) => f.name === destColumn) === undefined) {
-      throw new Error(
-        `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`
-      );
-    }
-    return alignTable(newTable, schema);
-  }
-  return newTable;
-}
-
-/*
- * Convert an Array of records into an Arrow Table, optionally applying an
- * embeddings function to it.
- *
- * This function calls `makeArrowTable` first to create the Arrow Table.
- * Any provided `makeTableOptions` (e.g. a schema) will be passed on to
- * that call.
- *
- * The embedding function will be passed a column of values (based on the
- * `sourceColumn` of the embedding function) and expects to receive back
- * number[][] which will be converted into a fixed size list column.  By
- * default this will be a fixed size list of Float32 but that can be
- * customized by the `embeddingDataType` property of the embedding function.
- *
- * If a schema is provided in `makeTableOptions` then it should include the
- * embedding columns.  If no schema is provded then embedding columns will
- * be placed at the end of the table, after all of the input columns.
- */
-export async function convertToTable<T>(
-  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunction<T>,
-  makeTableOptions?: Partial<MakeArrowTableOptions>
-): Promise<ArrowTable> {
-  const table = makeArrowTable(data, makeTableOptions);
-  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
-}
-
-// Creates the Arrow Type for a Vector column with dimension `dim`
-function newVectorType<T extends Float>(
-  dim: number,
-  innerType: T
-): FixedSizeList<T> {
-  // Somewhere we always default to have the elements nullable, so we need to set it to true
-  // otherwise we often get schema mismatches because the stored data always has schema with nullable elements
-  const children = new Field<T>("item", innerType, true);
-  return new FixedSizeList(dim, children);
-}
-
-/**
- * Serialize an Array of records into a buffer using the Arrow IPC File serialization
- *
- * This function will call `convertToTable` and pass on `embeddings` and `schema`
- *
- * `schema` is required if data is empty
- */
-export async function fromRecordsToBuffer<T>(
-  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== undefined && schema !== null) {
-    schema = sanitizeSchema(schema);
-  }
-  const table = await convertToTable(data, embeddings, { schema, embeddings });
-  const writer = RecordBatchFileWriter.writeAll(table);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-/**
- * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
- *
- * This function will call `convertToTable` and pass on `embeddings` and `schema`
- *
- * `schema` is required if data is empty
- */
-export async function fromRecordsToStreamBuffer<T>(
-  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== null && schema !== undefined) {
-    schema = sanitizeSchema(schema);
-  }
-  const table = await convertToTable(data, embeddings, { schema });
-  const writer = RecordBatchStreamWriter.writeAll(table);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-/**
- * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
- *
- * This function will apply `embeddings` to the table in a manner similar to
- * `convertToTable`.
- *
- * `schema` is required if the table is empty
- */
-export async function fromTableToBuffer<T>(
-  table: ArrowTable,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== null && schema !== undefined) {
-    schema = sanitizeSchema(schema);
-  }
-  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
-  const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-/**
- * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
- *
- * This function will apply `embeddings` to the table in a manner similar to
- * `convertToTable`.
- *
- * `schema` is required if the table is empty
- */
-export async function fromTableToStreamBuffer<T>(
-  table: ArrowTable,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== null && schema !== undefined) {
-    schema = sanitizeSchema(schema);
-  }
-  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
-  const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
-  const alignedChildren = [];
-  for (const field of schema.fields) {
-    const indexInBatch = batch.schema.fields?.findIndex(
-      (f) => f.name === field.name
-    );
-    if (indexInBatch < 0) {
-      throw new Error(
-        `The column ${field.name} was not found in the Arrow Table`
-      );
-    }
-    alignedChildren.push(batch.data.children[indexInBatch]);
-  }
-  const newData = makeData({
-    type: new Struct(schema.fields),
-    length: batch.numRows,
-    nullCount: batch.nullCount,
-    children: alignedChildren
-  });
-  return new RecordBatch(schema, newData);
-}
-
-function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
-  const alignedBatches = table.batches.map((batch) =>
-    alignBatch(batch, schema)
-  );
-  return new ArrowTable(schema, alignedBatches);
-}
-
-// Creates an empty Arrow Table
-export function createEmptyTable(schema: Schema): ArrowTable {
-  return new ArrowTable(sanitizeSchema(schema));
-}
-
-function validateSchemaEmbeddings(
-  schema: Schema<any>,
-  data: Array<Record<string, unknown>>,
-  embeddings: EmbeddingFunction<any> | undefined
-) {
-  const fields = [];
-  const missingEmbeddingFields = [];
-
-  // First we check if the field is a `FixedSizeList`
-  // Then we check if the data contains the field
-  // if it does not, we add it to the list of missing embedding fields
-  // Finally, we check if those missing embedding fields are `this._embeddings`
-  // if they are not, we throw an error
-  for (const field of schema.fields) {
-    if (field.type instanceof FixedSizeList) {
-      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
-        missingEmbeddingFields.push(field);
-      } else {
-        fields.push(field);
-      }
-    } else {
-      fields.push(field);
-    }
-  }
-
-  if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
-    throw new Error(
-      `Table has embeddings: "${missingEmbeddingFields
-        .map((f) => f.name)
-        .join(",")}", but no embedding function was provided`
-    );
-  }
-
-  return new Schema(fields, schema.metadata);
-}
--- a/node/src/embedding/embedding_function.ts
+++ b/node/src/embedding/embedding_function.ts
@@ -1,68 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { type Float } from 'apache-arrow'
-
-/**
- * An embedding function that automatically creates vector representation for a given column.
- */
-export interface EmbeddingFunction<T> {
-  /**
-   * The name of the column that will be used as input for the Embedding Function.
-   */
-  sourceColumn: string
-
-  /**
-   * The data type of the embedding
-   *
-   * The embedding function should return `number`.  This will be converted into
-   * an Arrow float array.  By default this will be Float32 but this property can
-   * be used to control the conversion.
-   */
-  embeddingDataType?: Float
-
-  /**
-   * The dimension of the embedding
-   *
-   * This is optional, normally this can be determined by looking at the results of
-   * `embed`.  If this is not specified, and there is an attempt to apply the embedding
-   * to an empty table, then that process will fail.
-   */
-  embeddingDimension?: number
-
-  /**
-   * The name of the column that will contain the embedding
-   *
-   * By default this is "vector"
-   */
-  destColumn?: string
-
-  /**
-   * Should the source column be excluded from the resulting table
-   *
-   * By default the source column is included.  Set this to true and
-   * only the embedding will be stored.
-   */
-  excludeSource?: boolean
-
-  /**
-   * Creates a vector representation for the given values.
-   */
-  embed: (data: T[]) => Promise<number[][]>
-}
-
-export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
-  return typeof value.sourceColumn === 'string' &&
-      typeof value.embed === 'function'
-}
--- a/node/src/embedding/openai.ts
+++ b/node/src/embedding/openai.ts
@@ -1,57 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { type EmbeddingFunction } from '../index'
-import type OpenAI from 'openai'
-
-export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
-  private readonly _openai: OpenAI
-  private readonly _modelName: string
-
-  constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
-    /**
-     * @type {import("openai").default}
-     */
-    let Openai
-    try {
-      // eslint-disable-next-line @typescript-eslint/no-var-requires
-      Openai = require('openai')
-    } catch {
-      throw new Error('please install openai@^4.24.1 using npm install openai')
-    }
-
-    this.sourceColumn = sourceColumn
-    const configuration = {
-      apiKey: openAIKey
-    }
-
-    this._openai = new Openai(configuration)
-    this._modelName = modelName
-  }
-
-  async embed (data: string[]): Promise<number[][]> {
-    const response = await this._openai.embeddings.create({
-      model: this._modelName,
-      input: data
-    })
-
-    const embeddings: number[][] = []
-    for (let i = 0; i < response.data.length; i++) {
-      embeddings.push(response.data[i].embedding)
-    }
-    return embeddings
-  }
-
-  sourceColumn: string
-}
--- a/node/src/index.ts
+++ b/node/src/index.ts
--- a/node/src/integration_test/test.ts
+++ b/node/src/integration_test/test.ts
@@ -1,180 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { describe } from 'mocha'
-import * as chai from 'chai'
-import { assert } from 'chai'
-import * as chaiAsPromised from 'chai-as-promised'
-import { v4 as uuidv4 } from 'uuid'
-
-import * as lancedb from '../index'
-import { tmpdir } from 'os'
-import * as fs from 'fs'
-import * as path from 'path'
-
-chai.use(chaiAsPromised)
-
-describe('LanceDB AWS Integration test', function () {
-  it('s3+ddb schema is processed correctly', async function () {
-    this.timeout(15000)
-
-    // WARNING: specifying engine is NOT a publicly supported feature in lancedb yet
-    // THE API WILL CHANGE
-    const conn = await lancedb.connect('s3://lancedb-integtest?engine=ddb&ddbTableName=lancedb-integtest')
-    const data = [{ vector: Array(128).fill(1.0) }]
-
-    const tableName = uuidv4()
-    let table = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
-
-    const futs = [table.add(data), table.add(data), table.add(data), table.add(data), table.add(data)]
-    await Promise.allSettled(futs)
-
-    table = await conn.openTable(tableName)
-    assert.equal(await table.countRows(), 6)
-  })
-})
-
-describe('LanceDB Mirrored Store Integration test', function () {
-  it('s3://...?mirroredStore=... param is processed correctly', async function () {
-    this.timeout(600000)
-
-    const dir = tmpdir()
-    console.log(dir)
-    const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
-    const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
-    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 1 }))
-    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 2 }))
-    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 3 }))
-
-    const tableName = uuidv4()
-
-    // try create table and check if it's mirrored
-    const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
-
-    const mirroredPath = path.join(dir, `${tableName}.lance`)
-    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
-      if (err != null) throw err
-      // there should be three dirs
-      assert.equal(files.length, 3)
-      assert.isTrue(files[0].isDirectory())
-      assert.isTrue(files[1].isDirectory())
-
-      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.txn'))
-      })
-
-      fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.manifest'))
-      })
-
-      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.lance'))
-      })
-    })
-
-    // try create index and check if it's mirrored
-    await t.createIndex({ column: 'vector', type: 'ivf_pq' })
-
-    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
-      if (err != null) throw err
-      // there should be four dirs
-      assert.equal(files.length, 4)
-      assert.isTrue(files[0].isDirectory())
-      assert.isTrue(files[1].isDirectory())
-      assert.isTrue(files[2].isDirectory())
-
-      // Two TXs now
-      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 2)
-        assert.isTrue(files[0].name.endsWith('.txn'))
-        assert.isTrue(files[1].name.endsWith('.txn'))
-      })
-
-      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.lance'))
-      })
-
-      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].isDirectory())
-
-        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
-          if (err != null) throw err
-
-          assert.equal(files.length, 1)
-          assert.isTrue(files[0].isFile())
-          assert.isTrue(files[0].name.endsWith('.idx'))
-        })
-      })
-    })
-
-    // try delete and check if it's mirrored
-    await t.delete('id = 0')
-
-    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
-      if (err != null) throw err
-      // there should be five dirs
-      assert.equal(files.length, 5)
-      assert.isTrue(files[0].isDirectory())
-      assert.isTrue(files[1].isDirectory())
-      assert.isTrue(files[2].isDirectory())
-      assert.isTrue(files[3].isDirectory())
-      assert.isTrue(files[4].isDirectory())
-
-      // Three TXs now
-      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 3)
-        assert.isTrue(files[0].name.endsWith('.txn'))
-        assert.isTrue(files[1].name.endsWith('.txn'))
-      })
-
-      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.lance'))
-      })
-
-      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].isDirectory())
-
-        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
-          if (err != null) throw err
-
-          assert.equal(files.length, 1)
-          assert.isTrue(files[0].isFile())
-          assert.isTrue(files[0].name.endsWith('.idx'))
-        })
-      })
-
-      fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
-        if (err != null) throw err
-        assert.equal(files.length, 1)
-        assert.isTrue(files[0].name.endsWith('.arrow'))
-      })
-    })
-  })
-})
--- a/node/src/middleware.ts
+++ b/node/src/middleware.ts
@@ -1,58 +0,0 @@
-// Copyright 2024 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-/**
- * Middleware for Remote LanceDB Connection or Table
- */
-export interface HttpMiddleware {
-  /**
-   * A callback that can be used to instrument the behavior of http requests to remote
-   * tables. It can be used to add headers, modify the request, or even short-circuit
-   * the request and return a response without making the request to the remote endpoint.
-   * It can also be used to modify the response from the remote endpoint.
-   *
-   * @param {RemoteResponse} res - Request to the remote endpoint
-   * @param {onRemoteRequestNext} next - Callback to advance the middleware chain
-   */
-  onRemoteRequest(
-    req: RemoteRequest,
-    next: (req: RemoteRequest) => Promise<RemoteResponse>,
-  ): Promise<RemoteResponse>
-};
-
-export enum Method {
-  GET,
-  POST
-}
-
-/**
- * A LanceDB Remote HTTP Request
- */
-export interface RemoteRequest {
-  uri: string
-  method: Method
-  headers: Map<string, string>
-  params?: Map<string, string>
-  body?: any
-}
-
-/**
- * A LanceDB Remote HTTP Response
- */
-export interface RemoteResponse {
-  status: number
-  statusText: string
-  headers: Map<string, string>
-  body: () => Promise<any>
-}
--- a/node/src/query.ts
+++ b/node/src/query.ts
@@ -1,163 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { Vector, tableFromIPC } from 'apache-arrow'
-import { type EmbeddingFunction } from './embedding/embedding_function'
-import { type MetricType } from '.'
-
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const { tableSearch } = require('../native.js')
-
-/**
- * A builder for nearest neighbor queries for LanceDB.
- */
-export class Query<T = number[]> {
-  private readonly _query?: T
-  private readonly _tbl?: any
-  private _queryVector?: number[]
-  private _limit?: number
-  private _refineFactor?: number
-  private _nprobes: number
-  private _select?: string[]
-  private _filter?: string
-  private _metricType?: MetricType
-  private _prefilter: boolean
-  private _fastSearch: boolean
-  protected readonly _embeddings?: EmbeddingFunction<T>
-
-  constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
-    this._tbl = tbl
-    this._query = query
-    this._limit = 10
-    this._nprobes = 20
-    this._refineFactor = undefined
-    this._select = undefined
-    this._filter = undefined
-    this._metricType = undefined
-    this._embeddings = embeddings
-    this._prefilter = false
-    this._fastSearch = false
-  }
-
-  /***
-     * Sets the number of results that will be returned
-     * default value is 10
-     * @param value number of results
-     */
-  limit (value: number): Query<T> {
-    this._limit = value
-    return this
-  }
-
-  /**
-     * Refine the results by reading extra elements and re-ranking them in memory.
-     * @param value refine factor to use in this query.
-     */
-  refineFactor (value: number): Query<T> {
-    this._refineFactor = value
-    return this
-  }
-
-  /**
-     * The number of probes used. A higher number makes search more accurate but also slower.
-     * @param value The number of probes used.
-     */
-  nprobes (value: number): Query<T> {
-    this._nprobes = value
-    return this
-  }
-
-  /**
-     * A filter statement to be applied to this query.
-     * @param value A filter in the same format used by a sql WHERE clause.
-     */
-  filter (value: string): Query<T> {
-    this._filter = value
-    return this
-  }
-
-  where = this.filter
-
-  /** Return only the specified columns.
-     *
-     * @param value Only select the specified columns. If not specified, all columns will be returned.
-     */
-  select (value: string[]): Query<T> {
-    this._select = value
-    return this
-  }
-
-  /**
-     * The MetricType used for this Query.
-     * @param value The metric to the. @see MetricType for the different options
-     */
-  metricType (value: MetricType): Query<T> {
-    this._metricType = value
-    return this
-  }
-
-  prefilter (value: boolean): Query<T> {
-    this._prefilter = value
-    return this
-  }
-
-  /**
-   * Skip searching un-indexed data. This can make search faster, but will miss
-   * any data that is not yet indexed.
-   */
-  fastSearch (value: boolean): Query<T> {
-    this._fastSearch = value
-    return this
-  }
-
-  /**
-     * Execute the query and return the results as an Array of Objects
-     */
-  async execute<T = Record<string, unknown>> (): Promise<T[]> {
-    if (this._query !== undefined) {
-      if (this._embeddings !== undefined) {
-        this._queryVector = (await this._embeddings.embed([this._query]))[0]
-      } else {
-        this._queryVector = this._query as number[]
-      }
-    }
-
-    const isElectron = this.isElectron()
-    const buffer = await tableSearch.call(this._tbl, this, isElectron)
-    const data = tableFromIPC(buffer)
-
-    return data.toArray().map((entry: Record<string, unknown>) => {
-      const newObject: Record<string, unknown> = {}
-      Object.keys(entry).forEach((key: string) => {
-        if (entry[key] instanceof Vector) {
-          // toJSON() returns f16 array correctly
-          newObject[key] = (entry[key] as any).toJSON()
-        } else {
-          newObject[key] = entry[key] as any
-        }
-      })
-      return newObject as unknown as T
-    })
-  }
-
-  // See https://github.com/electron/electron/issues/2288
-  private isElectron (): boolean {
-    try {
-      // eslint-disable-next-line no-prototype-builtins
-      return (process?.versions?.hasOwnProperty('electron') || navigator?.userAgent?.toLowerCase()?.includes(' electron'))
-    } catch (e) {
-      return false
-    }
-  }
-}
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -1,302 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import axios, { type AxiosError, type AxiosResponse, type ResponseType } from 'axios'
-
-import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
-
-import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
-import type { MetricType } from '..'
-
-interface HttpLancedbClientMiddleware {
-  onRemoteRequest(
-    req: RemoteRequest,
-    next: (req: RemoteRequest) => Promise<RemoteResponse>,
-  ): Promise<RemoteResponse>
-}
-
-/**
- * Invoke the middleware chain and at the end call the remote endpoint
- */
-async function callWithMiddlewares (
-  req: RemoteRequest,
-  middlewares: HttpLancedbClientMiddleware[],
-  opts?: MiddlewareInvocationOptions
-): Promise<RemoteResponse> {
-  async function call (
-    i: number,
-    req: RemoteRequest
-  ): Promise<RemoteResponse> {
-    // if we have reached the end of the middleware chain, make the request
-    if (i > middlewares.length) {
-      const headers = Object.fromEntries(req.headers.entries())
-      const params = Object.fromEntries(req.params?.entries() ?? [])
-      const timeout = opts?.timeout
-      let res
-      if (req.method === Method.POST) {
-        res = await axios.post(
-          req.uri,
-          req.body,
-          {
-            headers,
-            params,
-            timeout,
-            responseType: opts?.responseType
-          }
-        )
-      } else {
-        res = await axios.get(
-          req.uri,
-          {
-            headers,
-            params,
-            timeout
-          }
-        )
-      }
-
-      return toLanceRes(res)
-    }
-
-    // call next middleware in chain
-    return await middlewares[i - 1].onRemoteRequest(
-      req,
-      async (req) => {
-        return await call(i + 1, req)
-      }
-    )
-  }
-
-  return await call(1, req)
-}
-
-interface MiddlewareInvocationOptions {
-  responseType?: ResponseType
-  timeout?: number
-}
-
-/**
- * Marshall the library response into a LanceDB response
- */
-function toLanceRes (res: AxiosResponse): RemoteResponse {
-  const headers = new Map()
-  for (const h in res.headers) {
-    headers.set(h, res.headers[h])
-  }
-
-  return {
-    status: res.status,
-    statusText: res.statusText,
-    headers,
-    body: async () => {
-      return res.data
-    }
-  }
-}
-
-async function decodeErrorData(
-  res: RemoteResponse,
-  responseType?: ResponseType
-): Promise<string> {
-  const errorData = await res.body()
-  if (responseType === 'arraybuffer') {
-      return new TextDecoder().decode(errorData)
-  } else {
-    if (typeof errorData === 'object') {
-      return JSON.stringify(errorData)
-    }
-
-    return errorData
-  }
-}
-
-export class HttpLancedbClient {
-  private readonly _url: string
-  private readonly _apiKey: () => string
-  private readonly _middlewares: HttpLancedbClientMiddleware[]
-  private readonly _timeout: number | undefined
-
-  public constructor (
-    url: string,
-    apiKey: string,
-    timeout?: number,
-    private readonly _dbName?: string
-
-  ) {
-    this._url = url
-    this._apiKey = () => apiKey
-    this._middlewares = []
-    this._timeout = timeout
-  }
-
-  get uri (): string {
-    return this._url
-  }
-
-  public async search (
-    tableName: string,
-    vector: number[],
-    k: number,
-    nprobes: number,
-    prefilter: boolean,
-    refineFactor?: number,
-    columns?: string[],
-    filter?: string,
-    metricType?: MetricType,
-    fastSearch?: boolean
-  ): Promise<ArrowTable<any>> {
-    const result = await this.post(
-      `/v1/table/${tableName}/query/`,
-      {
-        vector,
-        k,
-        nprobes,
-        refine_factor: refineFactor,
-        columns,
-        filter,
-        prefilter,
-        metric: metricType,
-        fast_search: fastSearch
-      },
-      undefined,
-      undefined,
-      'arraybuffer'
-    )
-    const table = tableFromIPC(await result.body())
-    return table
-  }
-
-  /**
-   * Sent GET request.
-   */
-  public async get (path: string, params?: Record<string, string>): Promise<RemoteResponse> {
-    const req = {
-      uri: `${this._url}${path}`,
-      method: Method.GET,
-      headers: new Map(Object.entries({
-        'Content-Type': 'application/json',
-        'x-api-key': this._apiKey(),
-        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-      })),
-      params: new Map(Object.entries(params ?? {}))
-    }
-
-    let response
-    try {
-      response = await callWithMiddlewares(req, this._middlewares)
-      return response
-    } catch (err: any) {
-      console.error(serializeErrorAsJson(err))
-      if (err.response === undefined) {
-        throw new Error(`Network Error: ${err.message as string}`)
-      }
-
-      response = toLanceRes(err.response)
-    }
-
-    if (response.status !== 200) {
-      const errorData = await decodeErrorData(response)
-      throw new Error(
-        `Server Error, status: ${response.status}, ` +
-        `message: ${response.statusText}: ${errorData}`
-      )
-    }
-
-    return response
-  }
-
-  /**
-   * Sent POST request.
-   */
-  public async post (
-    path: string,
-    data?: any,
-    params?: Record<string, string>,
-    content?: string | undefined,
-    responseType?: ResponseType | undefined
-  ): Promise<RemoteResponse> {
-    const req = {
-      uri: `${this._url}${path}`,
-      method: Method.POST,
-      headers: new Map(Object.entries({
-        'Content-Type': content ?? 'application/json',
-        'x-api-key': this._apiKey(),
-        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-      })),
-      params: new Map(Object.entries(params ?? {})),
-      body: data
-    }
-
-    let response
-    try {
-      response = await callWithMiddlewares(req, this._middlewares, {
-        responseType,
-        timeout: this._timeout
-      })
-
-      // return response
-    } catch (err: any) {
-      console.error(serializeErrorAsJson(err))
-
-      if (err.response === undefined) {
-        throw new Error(`Network Error: ${err.message as string}`)
-      }
-      response = toLanceRes(err.response)
-    }
-
-    if (response.status !== 200) {
-      const errorData = await decodeErrorData(response, responseType)
-      throw new Error(
-        `Server Error, status: ${response.status}, ` +
-        `message: ${response.statusText}: ${errorData}`
-      )
-    }
-
-    return response
-  }
-
-  /**
-   * Instrument this client with middleware
-   * @param mw - The middleware that instruments the client
-   * @returns - an instance of this client instrumented with the middleware
-   */
-  public withMiddleware (mw: HttpLancedbClientMiddleware): HttpLancedbClient {
-    const wrapped = this.clone()
-    wrapped._middlewares.push(mw)
-    return wrapped
-  }
-
-  /**
-   * Make a clone of this client
-   */
-  private clone (): HttpLancedbClient {
-    const clone = new HttpLancedbClient(this._url, this._apiKey(), this._timeout, this._dbName)
-    for (const mw of this._middlewares) {
-      clone._middlewares.push(mw)
-    }
-    return clone
-  }
-}
-
-function serializeErrorAsJson(err: AxiosError) {
-  const error = JSON.parse(JSON.stringify(err, Object.getOwnPropertyNames(err)))
-  error.response = err.response != null
-      ? JSON.parse(JSON.stringify(
-        err.response,
-        // config contains the request data, too noisy
-        Object.getOwnPropertyNames(err.response).filter(prop => prop !== 'config')
-      ))
-      : null
-  return JSON.stringify({ error })
-}
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -1,567 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import {
-  type EmbeddingFunction,
-  type Table,
-  type VectorIndexParams,
-  type Connection,
-  type ConnectionOptions,
-  type CreateTableOptions,
-  type VectorIndex,
-  type WriteOptions,
-  type IndexStats,
-  type UpdateArgs,
-  type UpdateSqlArgs,
-  makeArrowTable,
-  type MergeInsertArgs,
-  type ColumnAlteration
-} from '../index'
-import { Query } from '../query'
-
-import { Vector, Table as ArrowTable } from 'apache-arrow'
-import { HttpLancedbClient } from './client'
-import { isEmbeddingFunction } from '../embedding/embedding_function'
-import {
-  createEmptyTable,
-  fromRecordsToStreamBuffer,
-  fromTableToStreamBuffer
-} from '../arrow'
-import { toSQL, TTLCache } from '../util'
-import { type HttpMiddleware } from '../middleware'
-
-/**
- * Remote connection.
- */
-export class RemoteConnection implements Connection {
-  private _client: HttpLancedbClient
-  private readonly _dbName: string
-  private readonly _tableCache = new TTLCache(300_000)
-
-  constructor (opts: ConnectionOptions) {
-    if (!opts.uri.startsWith('db://')) {
-      throw new Error(`Invalid remote DB URI: ${opts.uri}`)
-    }
-    if (opts.apiKey == null || opts.apiKey === '') {
-      opts = Object.assign({}, opts, { apiKey: process.env.LANCEDB_API_KEY })
-    }
-    if (opts.apiKey === undefined || opts.region === undefined) {
-      throw new Error(
-        'API key and region are must be passed for remote connections. ' +
-        'API key can also be set through LANCEDB_API_KEY env variable.')
-    }
-
-    this._dbName = opts.uri.slice('db://'.length)
-    let server: string
-    if (opts.hostOverride === undefined) {
-      server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
-    } else {
-      server = opts.hostOverride
-    }
-    this._client = new HttpLancedbClient(
-      server,
-      opts.apiKey,
-      opts.timeout,
-      opts.hostOverride === undefined ? undefined : this._dbName
-    )
-  }
-
-  get uri (): string {
-    // add the lancedb+ prefix back
-    return 'db://' + this._client.uri
-  }
-
-  async tableNames (
-    pageToken: string = '',
-    limit: number = 10
-  ): Promise<string[]> {
-    const response = await this._client.get('/v1/table/', {
-      limit: `${limit}`,
-      page_token: pageToken
-    })
-    const body = await response.body()
-    for (const table of body.tables) {
-      this._tableCache.set(table, true)
-    }
-    return body.tables
-  }
-
-  async openTable (name: string): Promise<Table>
-  async openTable<T>(
-    name: string,
-    embeddings: EmbeddingFunction<T>
-  ): Promise<Table<T>>
-  async openTable<T>(
-    name: string,
-    embeddings?: EmbeddingFunction<T>
-  ): Promise<Table<T>> {
-      // check if the table exists
-      if (this._tableCache.get(name) === undefined) {
-        await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
-        this._tableCache.set(name, true)
-      }
-
-    if (embeddings !== undefined) {
-      return new RemoteTable(this._client, name, embeddings)
-    } else {
-      return new RemoteTable(this._client, name)
-    }
-  }
-
-  async createTable<T>(
-    nameOrOpts: string | CreateTableOptions<T>,
-    data?: Array<Record<string, unknown>> | ArrowTable,
-    optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>,
-    opt?: WriteOptions
-  ): Promise<Table<T>> {
-    // Logic copied from LocatlConnection, refactor these to a base class + connectionImpl pattern
-    let schema
-    let embeddings: undefined | EmbeddingFunction<T>
-    let tableName: string
-    if (typeof nameOrOpts === 'string') {
-      if (
-        optsOrEmbedding !== undefined &&
-        isEmbeddingFunction(optsOrEmbedding)
-      ) {
-        embeddings = optsOrEmbedding
-      }
-      tableName = nameOrOpts
-    } else {
-      schema = nameOrOpts.schema
-      embeddings = nameOrOpts.embeddingFunction
-      tableName = nameOrOpts.name
-      if (data === undefined) {
-        data = nameOrOpts.data
-      }
-    }
-
-    let buffer: Buffer
-
-    function isEmpty (
-      data: Array<Record<string, unknown>> | ArrowTable<any>
-    ): boolean {
-      if (data instanceof ArrowTable) {
-        return data.numRows === 0
-      }
-      return data.length === 0
-    }
-
-    if (data === undefined || isEmpty(data)) {
-      if (schema === undefined) {
-        throw new Error('Either data or schema needs to defined')
-      }
-      buffer = await fromTableToStreamBuffer(createEmptyTable(schema))
-    } else if (data instanceof ArrowTable) {
-      buffer = await fromTableToStreamBuffer(data, embeddings)
-    } else {
-      // data is Array<Record<...>>
-      buffer = await fromRecordsToStreamBuffer(data, embeddings)
-    }
-
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(tableName)}/create/`,
-      buffer,
-      undefined,
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-
-    this._tableCache.set(tableName, true)
-    if (embeddings === undefined) {
-      return new RemoteTable(this._client, tableName)
-    } else {
-      return new RemoteTable(this._client, tableName, embeddings)
-    }
-  }
-
-  async dropTable (name: string): Promise<void> {
-    await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
-    this._tableCache.delete(name)
-  }
-
-  withMiddleware (middleware: HttpMiddleware): Connection {
-    const wrapped = this.clone()
-    wrapped._client = wrapped._client.withMiddleware(middleware)
-    return wrapped
-  }
-
-  private clone (): RemoteConnection {
-    const clone: RemoteConnection = Object.create(RemoteConnection.prototype)
-    return Object.assign(clone, this)
-  }
-}
-
-export class RemoteQuery<T = number[]> extends Query<T> {
-  constructor (
-    query: T,
-    private readonly _client: HttpLancedbClient,
-    private readonly _name: string,
-    embeddings?: EmbeddingFunction<T>
-  ) {
-    super(query, undefined, embeddings)
-  }
-
-  // TODO: refactor this to a base class + queryImpl pattern
-  async execute<T = Record<string, unknown>>(): Promise<T[]> {
-    const embeddings = this._embeddings
-    const query = (this as any)._query
-    let queryVector: number[]
-
-    if (embeddings !== undefined) {
-      queryVector = (await embeddings.embed([query]))[0]
-    } else {
-      queryVector = query as number[]
-    }
-
-    const data = await this._client.search(
-      this._name,
-      queryVector,
-      (this as any)._limit,
-      (this as any)._nprobes,
-      (this as any)._prefilter,
-      (this as any)._refineFactor,
-      (this as any)._select,
-      (this as any)._filter,
-      (this as any)._metricType,
-      (this as any)._fastSearch
-    )
-
-    return data.toArray().map((entry: Record<string, unknown>) => {
-      const newObject: Record<string, unknown> = {}
-      Object.keys(entry).forEach((key: string) => {
-        if (entry[key] instanceof Vector) {
-          newObject[key] = (entry[key] as any).toArray()
-        } else {
-          newObject[key] = entry[key] as any
-        }
-      })
-      return newObject as unknown as T
-    })
-  }
-}
-
-// we are using extend until we have next next version release
-// Table and Connection has both been refactored to interfaces
-export class RemoteTable<T = number[]> implements Table<T> {
-  private _client: HttpLancedbClient
-  private readonly _embeddings?: EmbeddingFunction<T>
-  private readonly _name: string
-
-  constructor (client: HttpLancedbClient, name: string)
-  constructor (
-    client: HttpLancedbClient,
-    name: string,
-    embeddings: EmbeddingFunction<T>
-  )
-  constructor (
-    client: HttpLancedbClient,
-    name: string,
-    embeddings?: EmbeddingFunction<T>
-  ) {
-    this._client = client
-    this._name = name
-    this._embeddings = embeddings
-  }
-
-  get name (): string {
-    return this._name
-  }
-
-  get schema (): Promise<any> {
-    return this._client
-      .post(`/v1/table/${encodeURIComponent(this._name)}/describe/`)
-      .then(async (res) => {
-        if (res.status !== 200) {
-          throw new Error(
-            `Server Error, status: ${res.status}, ` +
-              // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-              `message: ${res.statusText}: ${await res.body()}`
-          )
-        }
-        return (await res.body())?.schema
-      })
-  }
-
-  search (query: T): Query<T> {
-    return new RemoteQuery(query, this._client, encodeURIComponent(this._name)) //, this._embeddings_new)
-  }
-
-  filter (where: string): Query<T> {
-    throw new Error('Not implemented')
-  }
-
-  async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> {
-    let tbl: ArrowTable
-    if (data instanceof ArrowTable) {
-      tbl = data
-    } else {
-      tbl = makeArrowTable(data, await this.schema)
-    }
-
-    const queryParams: any = {
-      on
-    }
-    if (args.whenMatchedUpdateAll !== false && args.whenMatchedUpdateAll !== null && args.whenMatchedUpdateAll !== undefined) {
-      queryParams.when_matched_update_all = 'true'
-      if (typeof args.whenMatchedUpdateAll === 'string') {
-        queryParams.when_matched_update_all_filt = args.whenMatchedUpdateAll
-      }
-    } else {
-      queryParams.when_matched_update_all = 'false'
-    }
-    if (args.whenNotMatchedInsertAll ?? false) {
-      queryParams.when_not_matched_insert_all = 'true'
-    } else {
-      queryParams.when_not_matched_insert_all = 'false'
-    }
-    if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) {
-      queryParams.when_not_matched_by_source_delete = 'true'
-      if (typeof args.whenNotMatchedBySourceDelete === 'string') {
-        queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete
-      }
-    } else {
-      queryParams.when_not_matched_by_source_delete = 'false'
-    }
-
-    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/merge_insert/`,
-      buffer,
-      queryParams,
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-
-  async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
-    let tbl: ArrowTable
-    if (data instanceof ArrowTable) {
-      tbl = data
-    } else {
-      tbl = makeArrowTable(data, await this.schema)
-    }
-
-    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
-      buffer,
-      {
-        mode: 'append'
-      },
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-    return tbl.numRows
-  }
-
-  async overwrite (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
-    let tbl: ArrowTable
-    if (data instanceof ArrowTable) {
-      tbl = data
-    } else {
-      tbl = makeArrowTable(data)
-    }
-    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
-      buffer,
-      {
-        mode: 'overwrite'
-      },
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-    return tbl.numRows
-  }
-
-  async createIndex (indexParams: VectorIndexParams): Promise<void> {
-    const unsupportedParams = [
-      'index_name',
-      'num_partitions',
-      'max_iters',
-      'use_opq',
-      'num_sub_vectors',
-      'num_bits',
-      'max_opq_iters',
-      'replace'
-    ]
-    for (const param of unsupportedParams) {
-      // eslint-disable-next-line @typescript-eslint/strict-boolean-expressions
-      if (indexParams[param as keyof VectorIndexParams]) {
-        throw new Error(`${param} is not supported for remote connections`)
-      }
-    }
-
-    const column = indexParams.column ?? 'vector'
-    const indexType = 'vector'
-    const metricType = indexParams.metric_type ?? 'L2'
-    const indexCacheSize = indexParams.index_cache_size ?? null
-
-    const data = {
-      column,
-      index_type: indexType,
-      metric_type: metricType,
-      index_cache_size: indexCacheSize
-    }
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/create_index/`,
-      data
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-
-  async createScalarIndex (column: string): Promise<void> {
-    const indexType = 'scalar'
-
-    const data = {
-      column,
-      index_type: indexType,
-      replace: true
-    }
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/create_scalar_index/`,
-      data
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-  async dropIndex (index_name: string): Promise<void> {
-    const res = await this._client.post(
-        `/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
-    )
-    if (res.status !== 200) {
-      throw new Error(
-          `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-
-  async countRows (filter?: string): Promise<number> {
-    const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
-      predicate: filter
-    })
-    return (await result.body())
-  }
-
-  async delete (filter: string): Promise<void> {
-    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/delete/`, {
-      predicate: filter
-    })
-  }
-
-  async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
-    let filter: string | null
-    let updates: Record<string, string>
-
-    if ('valuesSql' in args) {
-      filter = args.where ?? null
-      updates = args.valuesSql
-    } else {
-      filter = args.where ?? null
-      updates = {}
-      for (const [key, value] of Object.entries(args.values)) {
-        updates[key] = toSQL(value)
-      }
-    }
-    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/update/`, {
-      predicate: filter,
-      updates: Object.entries(updates).map(([key, value]) => [key, value])
-    })
-  }
-
-  async listIndices (): Promise<VectorIndex[]> {
-    const results = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/index/list/`
-    )
-    return (await results.body()).indexes?.map((index: any) => ({
-      columns: index.columns,
-      name: index.index_name,
-      uuid: index.index_uuid,
-      status: index.status
-    }))
-  }
-
-  async indexStats (indexName: string): Promise<IndexStats> {
-    const results = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/index/${indexName}/stats/`
-    )
-    const body = await results.body()
-    return {
-      numIndexedRows: body?.num_indexed_rows,
-      numUnindexedRows: body?.num_unindexed_rows,
-      indexType: body?.index_type,
-      distanceType: body?.distance_type
-    }
-  }
-
-  async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
-    throw new Error('Add columns is not yet supported in LanceDB Cloud.')
-  }
-
-  async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
-    throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
-  }
-
-  async dropColumns (columnNames: string[]): Promise<void> {
-    throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
-  }
-
-  withMiddleware(middleware: HttpMiddleware): Table<T> {
-    const wrapped = this.clone()
-    wrapped._client = wrapped._client.withMiddleware(middleware)
-    return wrapped
-  }
-
-  private clone (): RemoteTable<T> {
-    const clone: RemoteTable<T> = Object.create(RemoteTable.prototype)
-    return Object.assign(clone, this)
-  }
-}
--- a/node/src/sanitize.ts
+++ b/node/src/sanitize.ts
@@ -1,508 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// The utilities in this file help sanitize data from the user's arrow
-// library into the types expected by vectordb's arrow library.  Node
-// generally allows for mulitple versions of the same library (and sometimes
-// even multiple copies of the same version) to be installed at the same
-// time.  However, arrow-js uses instanceof which expected that the input
-// comes from the exact same library instance.  This is not always the case
-// and so we must sanitize the input to ensure that it is compatible.
-
-import {
-  Field,
-  Utf8,
-  FixedSizeBinary,
-  FixedSizeList,
-  Schema,
-  List,
-  Struct,
-  Float,
-  Bool,
-  Date_,
-  Decimal,
-  type DataType,
-  Dictionary,
-  Binary,
-  Float32,
-  Interval,
-  Map_,
-  Duration,
-  Union,
-  Time,
-  Timestamp,
-  Type,
-  Null,
-  Int,
-  type Precision,
-  type DateUnit,
-  Int8,
-  Int16,
-  Int32,
-  Int64,
-  Uint8,
-  Uint16,
-  Uint32,
-  Uint64,
-  Float16,
-  Float64,
-  DateDay,
-  DateMillisecond,
-  DenseUnion,
-  SparseUnion,
-  TimeNanosecond,
-  TimeMicrosecond,
-  TimeMillisecond,
-  TimeSecond,
-  TimestampNanosecond,
-  TimestampMicrosecond,
-  TimestampMillisecond,
-  TimestampSecond,
-  IntervalDayTime,
-  IntervalYearMonth,
-  DurationNanosecond,
-  DurationMicrosecond,
-  DurationMillisecond,
-  DurationSecond
-} from "apache-arrow";
-import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type";
-
-function sanitizeMetadata(
-  metadataLike?: unknown
-): Map<string, string> | undefined {
-  if (metadataLike === undefined || metadataLike === null) {
-    return undefined;
-  }
-  if (!(metadataLike instanceof Map)) {
-    throw Error("Expected metadata, if present, to be a Map<string, string>");
-  }
-  for (const item of metadataLike) {
-    if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
-      throw Error(
-        "Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values"
-      );
-    }
-  }
-  return metadataLike as Map<string, string>;
-}
-
-function sanitizeInt(typeLike: object) {
-  if (
-    !("bitWidth" in typeLike) ||
-    typeof typeLike.bitWidth !== "number" ||
-    !("isSigned" in typeLike) ||
-    typeof typeLike.isSigned !== "boolean"
-  ) {
-    throw Error(
-      "Expected an Int Type to have a `bitWidth` and `isSigned` property"
-    );
-  }
-  return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
-}
-
-function sanitizeFloat(typeLike: object) {
-  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
-    throw Error("Expected a Float Type to have a `precision` property");
-  }
-  return new Float(typeLike.precision as Precision);
-}
-
-function sanitizeDecimal(typeLike: object) {
-  if (
-    !("scale" in typeLike) ||
-    typeof typeLike.scale !== "number" ||
-    !("precision" in typeLike) ||
-    typeof typeLike.precision !== "number" ||
-    !("bitWidth" in typeLike) ||
-    typeof typeLike.bitWidth !== "number"
-  ) {
-    throw Error(
-      "Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties"
-    );
-  }
-  return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
-}
-
-function sanitizeDate(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected a Date type to have a `unit` property");
-  }
-  return new Date_(typeLike.unit as DateUnit);
-}
-
-function sanitizeTime(typeLike: object) {
-  if (
-    !("unit" in typeLike) ||
-    typeof typeLike.unit !== "number" ||
-    !("bitWidth" in typeLike) ||
-    typeof typeLike.bitWidth !== "number"
-  ) {
-    throw Error(
-      "Expected a Time type to have `unit` and `bitWidth` properties"
-    );
-  }
-  return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
-}
-
-function sanitizeTimestamp(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected a Timestamp type to have a `unit` property");
-  }
-  let timezone = null;
-  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
-    timezone = typeLike.timezone;
-  }
-  return new Timestamp(typeLike.unit, timezone);
-}
-
-function sanitizeTypedTimestamp(
-  typeLike: object,
-  Datatype:
-    | typeof TimestampNanosecond
-    | typeof TimestampMicrosecond
-    | typeof TimestampMillisecond
-    | typeof TimestampSecond
-) {
-  let timezone = null;
-  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
-    timezone = typeLike.timezone;
-  }
-  return new Datatype(timezone);
-}
-
-function sanitizeInterval(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected an Interval type to have a `unit` property");
-  }
-  return new Interval(typeLike.unit);
-}
-
-function sanitizeList(typeLike: object) {
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a List type to have an array-like `children` property"
-    );
-  }
-  if (typeLike.children.length !== 1) {
-    throw Error("Expected a List type to have exactly one child");
-  }
-  return new List(sanitizeField(typeLike.children[0]));
-}
-
-function sanitizeStruct(typeLike: object) {
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a Struct type to have an array-like `children` property"
-    );
-  }
-  return new Struct(typeLike.children.map((child) => sanitizeField(child)));
-}
-
-function sanitizeUnion(typeLike: object) {
-  if (
-    !("typeIds" in typeLike) ||
-    !("mode" in typeLike) ||
-    typeof typeLike.mode !== "number"
-  ) {
-    throw Error(
-      "Expected a Union type to have `typeIds` and `mode` properties"
-    );
-  }
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a Union type to have an array-like `children` property"
-    );
-  }
-
-  return new Union(
-    typeLike.mode,
-    typeLike.typeIds as any,
-    typeLike.children.map((child) => sanitizeField(child))
-  );
-}
-
-function sanitizeTypedUnion(
-  typeLike: object,
-  UnionType: typeof DenseUnion | typeof SparseUnion
-) {
-  if (!("typeIds" in typeLike)) {
-    throw Error(
-      "Expected a DenseUnion/SparseUnion type to have a `typeIds` property"
-    );
-  }
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a DenseUnion/SparseUnion type to have an array-like `children` property"
-    );
-  }
-
-  return new UnionType(
-    typeLike.typeIds as any,
-    typeLike.children.map((child) => sanitizeField(child))
-  );
-}
-
-function sanitizeFixedSizeBinary(typeLike: object) {
-  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
-    throw Error(
-      "Expected a FixedSizeBinary type to have a `byteWidth` property"
-    );
-  }
-  return new FixedSizeBinary(typeLike.byteWidth);
-}
-
-function sanitizeFixedSizeList(typeLike: object) {
-  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
-    throw Error("Expected a FixedSizeList type to have a `listSize` property");
-  }
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a FixedSizeList type to have an array-like `children` property"
-    );
-  }
-  if (typeLike.children.length !== 1) {
-    throw Error("Expected a FixedSizeList type to have exactly one child");
-  }
-  return new FixedSizeList(
-    typeLike.listSize,
-    sanitizeField(typeLike.children[0])
-  );
-}
-
-function sanitizeMap(typeLike: object) {
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a Map type to have an array-like `children` property"
-    );
-  }
-  if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
-    throw Error("Expected a Map type to have a `keysSorted` property");
-  }
-  return new Map_(
-    typeLike.children.map((field) => sanitizeField(field)) as any,
-    typeLike.keysSorted
-  );
-}
-
-function sanitizeDuration(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected a Duration type to have a `unit` property");
-  }
-  return new Duration(typeLike.unit);
-}
-
-function sanitizeDictionary(typeLike: object) {
-  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
-    throw Error("Expected a Dictionary type to have an `id` property");
-  }
-  if (!("indices" in typeLike) || typeof typeLike.indices !== "object") {
-    throw Error("Expected a Dictionary type to have an `indices` property");
-  }
-  if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") {
-    throw Error("Expected a Dictionary type to have an `dictionary` property");
-  }
-  if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
-    throw Error("Expected a Dictionary type to have an `isOrdered` property");
-  }
-  return new Dictionary(
-    sanitizeType(typeLike.dictionary),
-    sanitizeType(typeLike.indices) as any,
-    typeLike.id,
-    typeLike.isOrdered
-  );
-}
-
-function sanitizeType(typeLike: unknown): DataType<any> {
-  if (typeof typeLike !== "object" || typeLike === null) {
-    throw Error("Expected a Type but object was null/undefined");
-  }
-  if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
-    throw Error("Expected a Type to have a typeId function");
-  }
-  let typeId: Type;
-  if (typeof typeLike.typeId === "function") {
-    typeId = (typeLike.typeId as () => unknown)() as Type;
-  } else if (typeof typeLike.typeId === "number") {
-    typeId = typeLike.typeId as Type;
-  } else {
-    throw Error("Type's typeId property was not a function or number");
-  }
-
-  switch (typeId) {
-    case Type.NONE:
-      throw Error("Received a Type with a typeId of NONE");
-    case Type.Null:
-      return new Null();
-    case Type.Int:
-      return sanitizeInt(typeLike);
-    case Type.Float:
-      return sanitizeFloat(typeLike);
-    case Type.Binary:
-      return new Binary();
-    case Type.Utf8:
-      return new Utf8();
-    case Type.Bool:
-      return new Bool();
-    case Type.Decimal:
-      return sanitizeDecimal(typeLike);
-    case Type.Date:
-      return sanitizeDate(typeLike);
-    case Type.Time:
-      return sanitizeTime(typeLike);
-    case Type.Timestamp:
-      return sanitizeTimestamp(typeLike);
-    case Type.Interval:
-      return sanitizeInterval(typeLike);
-    case Type.List:
-      return sanitizeList(typeLike);
-    case Type.Struct:
-      return sanitizeStruct(typeLike);
-    case Type.Union:
-      return sanitizeUnion(typeLike);
-    case Type.FixedSizeBinary:
-      return sanitizeFixedSizeBinary(typeLike);
-    case Type.FixedSizeList:
-      return sanitizeFixedSizeList(typeLike);
-    case Type.Map:
-      return sanitizeMap(typeLike);
-    case Type.Duration:
-      return sanitizeDuration(typeLike);
-    case Type.Dictionary:
-      return sanitizeDictionary(typeLike);
-    case Type.Int8:
-      return new Int8();
-    case Type.Int16:
-      return new Int16();
-    case Type.Int32:
-      return new Int32();
-    case Type.Int64:
-      return new Int64();
-    case Type.Uint8:
-      return new Uint8();
-    case Type.Uint16:
-      return new Uint16();
-    case Type.Uint32:
-      return new Uint32();
-    case Type.Uint64:
-      return new Uint64();
-    case Type.Float16:
-      return new Float16();
-    case Type.Float32:
-      return new Float32();
-    case Type.Float64:
-      return new Float64();
-    case Type.DateMillisecond:
-      return new DateMillisecond();
-    case Type.DateDay:
-      return new DateDay();
-    case Type.TimeNanosecond:
-      return new TimeNanosecond();
-    case Type.TimeMicrosecond:
-      return new TimeMicrosecond();
-    case Type.TimeMillisecond:
-      return new TimeMillisecond();
-    case Type.TimeSecond:
-      return new TimeSecond();
-    case Type.TimestampNanosecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampNanosecond);
-    case Type.TimestampMicrosecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond);
-    case Type.TimestampMillisecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampMillisecond);
-    case Type.TimestampSecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampSecond);
-    case Type.DenseUnion:
-      return sanitizeTypedUnion(typeLike, DenseUnion);
-    case Type.SparseUnion:
-      return sanitizeTypedUnion(typeLike, SparseUnion);
-    case Type.IntervalDayTime:
-      return new IntervalDayTime();
-    case Type.IntervalYearMonth:
-      return new IntervalYearMonth();
-    case Type.DurationNanosecond:
-      return new DurationNanosecond();
-    case Type.DurationMicrosecond:
-      return new DurationMicrosecond();
-    case Type.DurationMillisecond:
-      return new DurationMillisecond();
-    case Type.DurationSecond:
-      return new DurationSecond();
-  }
-}
-
-function sanitizeField(fieldLike: unknown): Field {
-  if (fieldLike instanceof Field) {
-    return fieldLike;
-  }
-  if (typeof fieldLike !== "object" || fieldLike === null) {
-    throw Error("Expected a Field but object was null/undefined");
-  }
-  if (
-    !("type" in fieldLike) ||
-    !("name" in fieldLike) ||
-    !("nullable" in fieldLike)
-  ) {
-    throw Error(
-      "The field passed in is missing a `type`/`name`/`nullable` property"
-    );
-  }
-  const type = sanitizeType(fieldLike.type);
-  const name = fieldLike.name;
-  if (!(typeof name === "string")) {
-    throw Error("The field passed in had a non-string `name` property");
-  }
-  const nullable = fieldLike.nullable;
-  if (!(typeof nullable === "boolean")) {
-    throw Error("The field passed in had a non-boolean `nullable` property");
-  }
-  let metadata;
-  if ("metadata" in fieldLike) {
-    metadata = sanitizeMetadata(fieldLike.metadata);
-  }
-  return new Field(name, type, nullable, metadata);
-}
-
-/**
- * Convert something schemaLike into a Schema instance
- *
- * This method is often needed even when the caller is using a Schema
- * instance because they might be using a different instance of apache-arrow
- * than lancedb is using.
- */
-export function sanitizeSchema(schemaLike: unknown): Schema {
-  if (schemaLike instanceof Schema) {
-    return schemaLike;
-  }
-  if (typeof schemaLike !== "object" || schemaLike === null) {
-    throw Error("Expected a Schema but object was null/undefined");
-  }
-  if (!("fields" in schemaLike)) {
-    throw Error(
-      "The schema passed in does not appear to be a schema (no 'fields' property)"
-    );
-  }
-  let metadata;
-  if ("metadata" in schemaLike) {
-    metadata = sanitizeMetadata(schemaLike.metadata);
-  }
-  if (!Array.isArray(schemaLike.fields)) {
-    throw Error(
-      "The schema passed in had a 'fields' property but it was not an array"
-    );
-  }
-  const sanitizedFields = schemaLike.fields.map((field) =>
-    sanitizeField(field)
-  );
-  return new Schema(sanitizedFields, metadata);
-}
--- a/node/src/test/arrow.test.ts
+++ b/node/src/test/arrow.test.ts
@@ -1,360 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { describe } from 'mocha'
-import { assert, expect, use as chaiUse } from 'chai'
-import * as chaiAsPromised from 'chai-as-promised'
-
-import { convertToTable, fromTableToBuffer, makeArrowTable, makeEmptyTable } from '../arrow'
-import {
-  Field,
-  FixedSizeList,
-  Float16,
-  Float32,
-  Int32,
-  tableFromIPC,
-  Schema,
-  Float64,
-  type Table,
-  Binary,
-  Bool,
-  Utf8,
-  Struct,
-  List,
-  DataType,
-  Dictionary,
-  Int64,
-  MetadataVersion
-} from 'apache-arrow'
-import {
-  Dictionary as OldDictionary,
-  Field as OldField,
-  FixedSizeList as OldFixedSizeList,
-  Float32 as OldFloat32,
-  Int32 as OldInt32,
-  Struct as OldStruct,
-  Schema as OldSchema,
-  TimestampNanosecond as OldTimestampNanosecond,
-  Utf8 as OldUtf8
-} from 'apache-arrow-old'
-import { type EmbeddingFunction } from '../embedding/embedding_function'
-
-chaiUse(chaiAsPromised)
-
-function sampleRecords (): Array<Record<string, any>> {
-  return [
-    {
-      binary: Buffer.alloc(5),
-      boolean: false,
-      number: 7,
-      string: 'hello',
-      struct: { x: 0, y: 0 },
-      list: ['anime', 'action', 'comedy']
-    }
-  ]
-}
-
-// Helper method to verify various ways to create a table
-async function checkTableCreation (tableCreationMethod: (records: any, recordsReversed: any, schema: Schema) => Promise<Table>): Promise<void> {
-  const records = sampleRecords()
-  const recordsReversed = [{
-    list: ['anime', 'action', 'comedy'],
-    struct: { x: 0, y: 0 },
-    string: 'hello',
-    number: 7,
-    boolean: false,
-    binary: Buffer.alloc(5)
-  }]
-  const schema = new Schema([
-    new Field('binary', new Binary(), false),
-    new Field('boolean', new Bool(), false),
-    new Field('number', new Float64(), false),
-    new Field('string', new Utf8(), false),
-    new Field('struct', new Struct([
-      new Field('x', new Float64(), false),
-      new Field('y', new Float64(), false)
-    ])),
-    new Field('list', new List(new Field('item', new Utf8(), false)), false)
-  ])
-
-  const table = await tableCreationMethod(records, recordsReversed, schema)
-  schema.fields.forEach((field, idx) => {
-    const actualField = table.schema.fields[idx]
-    assert.isFalse(actualField.nullable)
-    assert.equal(table.getChild(field.name)?.type.toString(), field.type.toString())
-    assert.equal(table.getChildAt(idx)?.type.toString(), field.type.toString())
-  })
-}
-
-describe('The function makeArrowTable', function () {
-  it('will use data types from a provided schema instead of inference', async function () {
-    const schema = new Schema([
-      new Field('a', new Int32()),
-      new Field('b', new Float32()),
-      new Field('c', new FixedSizeList(3, new Field('item', new Float16()))),
-      new Field('d', new Int64())
-    ])
-    const table = makeArrowTable(
-      [
-        { a: 1, b: 2, c: [1, 2, 3], d: 9 },
-        { a: 4, b: 5, c: [4, 5, 6], d: 10 },
-        { a: 7, b: 8, c: [7, 8, 9], d: null }
-      ],
-      { schema }
-    )
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-
-    const actual = tableFromIPC(buf)
-    assert.equal(actual.numRows, 3)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-
-  it('will assume the column `vector` is FixedSizeList<Float32> by default', async function () {
-    const schema = new Schema([
-      new Field('a', new Float64()),
-      new Field('b', new Float64()),
-      new Field(
-        'vector',
-        new FixedSizeList(3, new Field('item', new Float32(), true))
-      )
-    ])
-    const table = makeArrowTable([
-      { a: 1, b: 2, vector: [1, 2, 3] },
-      { a: 4, b: 5, vector: [4, 5, 6] },
-      { a: 7, b: 8, vector: [7, 8, 9] }
-    ])
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-
-    const actual = tableFromIPC(buf)
-    assert.equal(actual.numRows, 3)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-
-  it('can support multiple vector columns', async function () {
-    const schema = new Schema([
-      new Field('a', new Float64()),
-      new Field('b', new Float64()),
-      new Field('vec1', new FixedSizeList(3, new Field('item', new Float16(), true))),
-      new Field('vec2', new FixedSizeList(3, new Field('item', new Float16(), true)))
-    ])
-    const table = makeArrowTable(
-      [
-        { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-        { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-        { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
-      ],
-      {
-        vectorColumns: {
-          vec1: { type: new Float16() },
-          vec2: { type: new Float16() }
-        }
-      }
-    )
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-
-    const actual = tableFromIPC(buf)
-    assert.equal(actual.numRows, 3)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-
-  it('will allow different vector column types', async function () {
-    const table = makeArrowTable(
-      [
-        { fp16: [1], fp32: [1], fp64: [1] }
-      ],
-      {
-        vectorColumns: {
-          fp16: { type: new Float16() },
-          fp32: { type: new Float32() },
-          fp64: { type: new Float64() }
-        }
-      }
-    )
-
-    assert.equal(table.getChild('fp16')?.type.children[0].type.toString(), new Float16().toString())
-    assert.equal(table.getChild('fp32')?.type.children[0].type.toString(), new Float32().toString())
-    assert.equal(table.getChild('fp64')?.type.children[0].type.toString(), new Float64().toString())
-  })
-
-  it('will use dictionary encoded strings if asked', async function () {
-    const table = makeArrowTable([{ str: 'hello' }])
-    assert.isTrue(DataType.isUtf8(table.getChild('str')?.type))
-
-    const tableWithDict = makeArrowTable([{ str: 'hello' }], { dictionaryEncodeStrings: true })
-    assert.isTrue(DataType.isDictionary(tableWithDict.getChild('str')?.type))
-
-    const schema = new Schema([
-      new Field('str', new Dictionary(new Utf8(), new Int32()))
-    ])
-
-    const tableWithDict2 = makeArrowTable([{ str: 'hello' }], { schema })
-    assert.isTrue(DataType.isDictionary(tableWithDict2.getChild('str')?.type))
-  })
-
-  it('will infer data types correctly', async function () {
-    await checkTableCreation(async (records) => makeArrowTable(records))
-  })
-
-  it('will allow a schema to be provided', async function () {
-    await checkTableCreation(async (records, _, schema) => makeArrowTable(records, { schema }))
-  })
-
-  it('will use the field order of any provided schema', async function () {
-    await checkTableCreation(async (_, recordsReversed, schema) => makeArrowTable(recordsReversed, { schema }))
-  })
-
-  it('will make an empty table', async function () {
-    await checkTableCreation(async (_, __, schema) => makeArrowTable([], { schema }))
-  })
-})
-
-class DummyEmbedding implements EmbeddingFunction<string> {
-  public readonly sourceColumn = 'string'
-  public readonly embeddingDimension = 2
-  public readonly embeddingDataType = new Float16()
-
-  async embed (data: string[]): Promise<number[][]> {
-    return data.map(
-      () => [0.0, 0.0]
-    )
-  }
-}
-
-class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
-  public readonly sourceColumn = 'string'
-
-  async embed (data: string[]): Promise<number[][]> {
-    return data.map(
-      () => [0.0, 0.0]
-    )
-  }
-}
-
-describe('convertToTable', function () {
-  it('will infer data types correctly', async function () {
-    await checkTableCreation(async (records) => await convertToTable(records))
-  })
-
-  it('will allow a schema to be provided', async function () {
-    await checkTableCreation(async (records, _, schema) => await convertToTable(records, undefined, { schema }))
-  })
-
-  it('will use the field order of any provided schema', async function () {
-    await checkTableCreation(async (_, recordsReversed, schema) => await convertToTable(recordsReversed, undefined, { schema }))
-  })
-
-  it('will make an empty table', async function () {
-    await checkTableCreation(async (_, __, schema) => await convertToTable([], undefined, { schema }))
-  })
-
-  it('will apply embeddings', async function () {
-    const records = sampleRecords()
-    const table = await convertToTable(records, new DummyEmbedding())
-    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
-    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
-  })
-
-  it('will fail if missing the embedding source column', async function () {
-    return await expect(convertToTable([{ id: 1 }], new DummyEmbedding())).to.be.rejectedWith("'string' was not present")
-  })
-
-  it('use embeddingDimension if embedding missing from table', async function () {
-    const schema = new Schema([
-      new Field('string', new Utf8(), false)
-    ])
-    // Simulate getting an empty Arrow table (minus embedding) from some other source
-    // In other words, we aren't starting with records
-    const table = makeEmptyTable(schema)
-
-    // If the embedding specifies the dimension we are fine
-    await fromTableToBuffer(table, new DummyEmbedding())
-
-    // We can also supply a schema and should be ok
-    const schemaWithEmbedding = new Schema([
-      new Field('string', new Utf8(), false),
-      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
-    ])
-    await fromTableToBuffer(table, new DummyEmbeddingWithNoDimension(), schemaWithEmbedding)
-
-    // Otherwise we will get an error
-    return await expect(fromTableToBuffer(table, new DummyEmbeddingWithNoDimension())).to.be.rejectedWith('does not specify `embeddingDimension`')
-  })
-
-  it('will apply embeddings to an empty table', async function () {
-    const schema = new Schema([
-      new Field('string', new Utf8(), false),
-      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
-    ])
-    const table = await convertToTable([], new DummyEmbedding(), { schema })
-    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
-    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
-  })
-
-  it('will complain if embeddings present but schema missing embedding column', async function () {
-    const schema = new Schema([
-      new Field('string', new Utf8(), false)
-    ])
-    return await expect(convertToTable([], new DummyEmbedding(), { schema })).to.be.rejectedWith('column vector was missing')
-  })
-
-  it('will provide a nice error if run twice', async function () {
-    const records = sampleRecords()
-    const table = await convertToTable(records, new DummyEmbedding())
-    // fromTableToBuffer will try and apply the embeddings again
-    return await expect(fromTableToBuffer(table, new DummyEmbedding())).to.be.rejectedWith('already existed')
-  })
-})
-
-describe('makeEmptyTable', function () {
-  it('will make an empty table', async function () {
-    await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema))
-  })
-})
-
-describe('when using two versions of arrow', function () {
-  it('can still import data', async function() {
-    const schema = new OldSchema([
-      new OldField('id', new OldInt32()),
-      new OldField('vector', new OldFixedSizeList(1024, new OldField("item", new OldFloat32(), true))),
-      new OldField('struct', new OldStruct([
-        new OldField('nested', new OldDictionary(new OldUtf8(), new OldInt32(), 1, true)),
-        new OldField('ts_with_tz', new OldTimestampNanosecond("some_tz")),
-        new OldField('ts_no_tz', new OldTimestampNanosecond(null))
-      ]))
-    ]) as any
-    // We use arrow version 13 to emulate a "foreign arrow" and this version doesn't have metadataVersion
-    // In theory, this wouldn't matter.  We don't rely on that property.  However, it causes deepEqual to
-    // fail so we patch it back in
-    schema.metadataVersion = MetadataVersion.V5
-    const table = makeArrowTable(
-      [],
-      { schema }
-    )
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-    const actual = tableFromIPC(buf)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-})
--- a/node/src/test/embedding/openai.ts
+++ b/node/src/test/embedding/openai.ts
@@ -1,55 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { describe } from 'mocha'
-import { assert } from 'chai'
-
-import { OpenAIEmbeddingFunction } from '../../embedding/openai'
-import { isEmbeddingFunction } from '../../embedding/embedding_function'
-
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const OpenAIApi = require('openai')
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const { stub } = require('sinon')
-
-describe('OpenAPIEmbeddings', function () {
-  const stubValue = {
-    data: [
-      {
-        embedding: Array(1536).fill(1.0)
-      },
-      {
-        embedding: Array(1536).fill(2.0)
-      }
-    ]
-  }
-
-  describe('#embed', function () {
-    it('should create vector embeddings', async function () {
-      const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
-      const f = new OpenAIEmbeddingFunction('text', 'sk-key')
-      const vectors = await f.embed(['abc', 'def'])
-      assert.isTrue(openAIStub.calledOnce)
-      assert.equal(vectors.length, 2)
-      assert.deepEqual(vectors[0], stubValue.data[0].embedding)
-      assert.deepEqual(vectors[1], stubValue.data[1].embedding)
-    })
-  })
-
-  describe('isEmbeddingFunction', function () {
-    it('should match the isEmbeddingFunction guard', function () {
-      assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
-    })
-  })
-})
--- a/node/src/test/io.ts
+++ b/node/src/test/io.ts
@@ -1,76 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// IO tests
-
-import { describe } from 'mocha'
-import { assert } from 'chai'
-
-import * as lancedb from '../index'
-import { type ConnectionOptions } from '../index'
-
-describe('LanceDB S3 client', function () {
-  if (process.env.TEST_S3_BASE_URL != null) {
-    const baseUri = process.env.TEST_S3_BASE_URL
-    it('should have a valid url', async function () {
-      const opts = { uri: `${baseUri}/valid_url` }
-      const table = await createTestDB(opts, 2, 20)
-      const con = await lancedb.connect(opts)
-      assert.equal(con.uri, opts.uri)
-
-      const results = await table.search([0.1, 0.3]).limit(5).execute()
-      assert.equal(results.length, 5)
-    }).timeout(10_000)
-  } else {
-    describe.skip('Skip S3 test', function () {})
-  }
-
-  if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) {
-    const baseUri = process.env.TEST_S3_BASE_URL
-    it('use custom credentials', async function () {
-      const opts: ConnectionOptions = {
-        uri: `${baseUri}/custom_credentials`,
-        awsCredentials: {
-          accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string,
-          secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string
-        }
-      }
-      const table = await createTestDB(opts, 2, 20)
-      console.log(table)
-      const con = await lancedb.connect(opts)
-      console.log(con)
-      assert.equal(con.uri, opts.uri)
-
-      const results = await table.search([0.1, 0.3]).limit(5).execute()
-      assert.equal(results.length, 5)
-    }).timeout(10_000)
-  } else {
-    describe.skip('Skip S3 test', function () {})
-  }
-})
-
-async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
-  const con = await lancedb.connect(opts)
-
-  const data = []
-  for (let i = 0; i < numRows; i++) {
-    const vector = []
-    for (let j = 0; j < numDimensions; j++) {
-      vector.push(i + (j * 0.1))
-    }
-    data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
-  }
-
-  return await con.createTable('vectors_2', data)
-}
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
--- a/node/src/test/util.ts
+++ b/node/src/test/util.ts
@@ -1,45 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { toSQL } from '../util'
-import * as chai from 'chai'
-
-const expect = chai.expect
-
-describe('toSQL', function () {
-  it('should turn string to SQL expression', function () {
-    expect(toSQL('foo')).to.equal("'foo'")
-  })
-
-  it('should turn number to SQL expression', function () {
-    expect(toSQL(123)).to.equal('123')
-  })
-
-  it('should turn boolean to SQL expression', function () {
-    expect(toSQL(true)).to.equal('TRUE')
-  })
-
-  it('should turn null to SQL expression', function () {
-    expect(toSQL(null)).to.equal('NULL')
-  })
-
-  it('should turn Date to SQL expression', function () {
-    const date = new Date('05 October 2011 14:48 UTC')
-    expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'")
-  })
-
-  it('should turn array to SQL expression', function () {
-    expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]")
-  })
-})
--- a/node/src/util.ts
+++ b/node/src/util.ts
@@ -1,77 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-export type Literal = string | number | boolean | null | Date | Literal[]
-
-export function toSQL (value: Literal): string {
-  if (typeof value === 'string') {
-    return `'${value}'`
-  }
-
-  if (typeof value === 'number') {
-    return value.toString()
-  }
-
-  if (typeof value === 'boolean') {
-    return value ? 'TRUE' : 'FALSE'
-  }
-
-  if (value === null) {
-    return 'NULL'
-  }
-
-  if (value instanceof Date) {
-    return `'${value.toISOString()}'`
-  }
-
-  if (Array.isArray(value)) {
-    return `[${value.map(toSQL).join(', ')}]`
-  }
-
-  // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
-}
-
-export class TTLCache {
-  private readonly cache: Map<string, { value: any, expires: number }>
-
-  /**
-   * @param ttl Time to live in milliseconds
-   */
-  constructor (private readonly ttl: number) {
-    this.cache = new Map()
-  }
-
-  get (key: string): any | undefined {
-    const entry = this.cache.get(key)
-    if (entry === undefined) {
-      return undefined
-    }
-
-    if (entry.expires < Date.now()) {
-      this.cache.delete(key)
-      return undefined
-    }
-
-    return entry.value
-  }
-
-  set (key: string, value: any): void {
-    this.cache.set(key, { value, expires: Date.now() + this.ttl })
-  }
-
-  delete (key: string): void {
-    this.cache.delete(key)
-  }
-}
--- a/node/tsconfig.json
+++ b/node/tsconfig.json
@@ -1,14 +0,0 @@
-{
-  "include": [
-    "src/**/*.ts",
-    "src/*.ts"
-  ],
-  "compilerOptions": {
-    "target": "ES2020",
-    "module": "commonjs",
-    "declaration": true,
-    "outDir": "./dist",
-    "strict": true,
-    "sourceMap": true,
-  }
-}
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -0,0 +1,13 @@
+These are the typescript bindings of LanceDB.
+The core Rust library is in the `../rust/lancedb` directory, the rust binding
+code is in the `src/` directory and the typescript bindings are in
+the `lancedb/` directory.
+
+Whenever you change the Rust code, you will need to recompile: `npm run build`.
+
+Common commands:
+* Build: `npm run build`
+* Lint: `npm run lint`
+* Fix lints: `npm run lint-fix`
+* Test: `npm test`
+* Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.2-beta.0"
+version = "0.21.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,7 +1,16 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-import { Bool, Field, Int32, List, Schema, Struct, Utf8 } from "apache-arrow";
+import {
+  Bool,
+  Field,
+  Int32,
+  List,
+  Schema,
+  Struct,
+  Uint8,
+  Utf8,
+} from "apache-arrow";

 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
@@ -255,6 +264,98 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actualSchema).toEqual(schema);
      });

+      it("will detect vector columns when name contains 'vector' or 'embedding'", async function () {
+        // Test various naming patterns that should be detected as vector columns
+        const floatVectorTable = makeArrowTable([
+          {
+            // Float vectors (use decimal values to ensure they're treated as floats)
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            user_vector: [1.1, 2.2],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            text_embedding: [3.3, 4.4],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            doc_embeddings: [5.5, 6.6],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            my_vector_field: [7.7, 8.8],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            embedding_model: [9.9, 10.1],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            VECTOR_COL: [11.1, 12.2], // uppercase
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            Vector_Mixed: [13.3, 14.4], // mixed case
+          },
+        ]);
+
+        // Check that columns with 'vector' or 'embedding' in name are converted to FixedSizeList
+        const floatVectorColumns = [
+          "user_vector",
+          "text_embedding",
+          "doc_embeddings",
+          "my_vector_field",
+          "embedding_model",
+          "VECTOR_COL",
+          "Vector_Mixed",
+        ];
+
+        for (const columnName of floatVectorColumns) {
+          expect(
+            DataType.isFixedSizeList(
+              floatVectorTable.getChild(columnName)?.type,
+            ),
+          ).toBe(true);
+          // Check that float vectors use Float32 by default
+          expect(
+            floatVectorTable
+              .getChild(columnName)
+              ?.type.children[0].type.toString(),
+          ).toEqual(new Float32().toString());
+        }
+
+        // Test that regular integer arrays still get treated as float vectors
+        // (since JavaScript doesn't distinguish integers from floats at runtime)
+        const integerArrayTable = makeArrowTable([
+          {
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            vector_int: [1, 2], // Regular array with integers - should be Float32
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            embedding_int: [3, 4], // Regular array with integers - should be Float32
+          },
+        ]);
+
+        const integerArrayColumns = ["vector_int", "embedding_int"];
+
+        for (const columnName of integerArrayColumns) {
+          expect(
+            DataType.isFixedSizeList(
+              integerArrayTable.getChild(columnName)?.type,
+            ),
+          ).toBe(true);
+          // Regular integer arrays should use Float32 (avoiding false positives)
+          expect(
+            integerArrayTable
+              .getChild(columnName)
+              ?.type.children[0].type.toString(),
+          ).toEqual(new Float32().toString());
+        }
+
+        // Test normal list should NOT be converted to FixedSizeList
+        const normalListTable = makeArrowTable([
+          {
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            normal_list: [15.5, 16.6], // should NOT be detected as vector
+          },
+        ]);
+
+        expect(
+          DataType.isFixedSizeList(
+            normalListTable.getChild("normal_list")?.type,
+          ),
+        ).toBe(false);
+        expect(
+          DataType.isList(normalListTable.getChild("normal_list")?.type),
+        ).toBe(true);
+      });
+
      it("will allow different vector column types", async function () {
        const table = makeArrowTable([{ fp16: [1], fp32: [1], fp64: [1] }], {
          vectorColumns: {
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -42,6 +42,28 @@ describe("remote connection", () => {
    });
  });

+  it("should accept overall timeout configuration", async () => {
+    await connect("db://test", {
+      apiKey: "fake",
+      clientConfig: {
+        timeoutConfig: { timeout: 30 },
+      },
+    });
+
+    // Test with all timeout parameters
+    await connect("db://test", {
+      apiKey: "fake",
+      clientConfig: {
+        timeoutConfig: {
+          timeout: 60,
+          connectTimeout: 10,
+          readTimeout: 20,
+          poolIdleTimeout: 300,
+        },
+      },
+    });
+  });
+
  it("should pass down apiKey and userAgent", async () => {
    await withMockDatabase(
      (req, res) => {
--- a/nodejs/test/session.test.ts
+++ b/nodejs/test/session.test.ts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import * as tmp from "tmp";
+import { Session, connect } from "../lancedb";
+
+describe("Session", () => {
+  let tmpDir: tmp.DirResult;
+  beforeEach(() => {
+    tmpDir = tmp.dirSync({ unsafeCleanup: true });
+  });
+  afterEach(() => tmpDir.removeCallback());
+
+  it("should configure cache sizes and work with database operations", async () => {
+    // Create session with small cache limits for testing
+    const indexCacheSize = BigInt(1024 * 1024); // 1MB
+    const metadataCacheSize = BigInt(512 * 1024); // 512KB
+
+    const session = new Session(indexCacheSize, metadataCacheSize);
+
+    // Record initial cache state
+    const initialCacheSize = session.sizeBytes();
+    const initialCacheItems = session.approxNumItems();
+
+    // Test session works with database connection
+    const db = await connect({ uri: tmpDir.name, session: session });
+
+    // Create and use a table to exercise the session
+    const data = Array.from({ length: 100 }, (_, i) => ({
+      id: i,
+      text: `item ${i}`,
+    }));
+    const table = await db.createTable("test", data);
+    const results = await table.query().limit(5).toArray();
+
+    expect(results).toHaveLength(5);
+
+    // Verify cache usage increased after operations
+    const finalCacheSize = session.sizeBytes();
+    const finalCacheItems = session.approxNumItems();
+
+    expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
+    expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
+    expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
+  });
+});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -582,7 +582,7 @@ describe("When creating an index", () => {
      "Invalid input, minimum_nprobes must be greater than 0",
    );
    expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
-      "Invalid input, maximum_nprobes must be greater than minimum_nprobes",
+      "Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes",
    );

    await tbl.dropIndex("vec_idx");
--- a/nodejs/examples/package-lock.json
+++ b/nodejs/examples/package-lock.json
@@ -30,7 +30,7 @@
        "x64",
        "arm64"
      ],
-      "license": "Apache 2.0",
+      "license": "Apache-2.0",
      "os": [
        "darwin",
        "linux",
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -34,6 +34,7 @@ import {
  Struct,
  Timestamp,
  Type,
+  Uint8,
  Utf8,
  Vector,
  makeVector as arrowMakeVector,
@@ -51,6 +52,15 @@ import {
  sanitizeTable,
  sanitizeType,
 } from "./sanitize";
+
+/**
+ * Check if a field name indicates a vector column.
+ */
+function nameSuggestsVectorColumn(fieldName: string): boolean {
+  const nameLower = fieldName.toLowerCase();
+  return nameLower.includes("vector") || nameLower.includes("embedding");
+}
+
 export * from "apache-arrow";
 export type SchemaLike =
  | Schema
@@ -591,10 +601,17 @@ function inferType(
      return undefined;
    }
    // Try to automatically detect embedding columns.
-    if (valueType instanceof Float && path[path.length - 1] === "vector") {
-      // We default to Float32 for vectors.
-      const child = new Field("item", new Float32(), true);
-      return new FixedSizeList(value.length, child);
+    if (nameSuggestsVectorColumn(path[path.length - 1])) {
+      // Check if value is a Uint8Array for integer vector type determination
+      if (value instanceof Uint8Array) {
+        // For integer vectors, we default to Uint8 (matching Python implementation)
+        const child = new Field("item", new Uint8(), true);
+        return new FixedSizeList(value.length, child);
+      } else {
+        // For float vectors, we default to Float32
+        const child = new Field("item", new Float32(), true);
+        return new FixedSizeList(value.length, child);
+      }
    } else {
      const child = new Field("item", valueType, true);
      return new List(child);
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -85,6 +85,9 @@ export interface OpenTableOptions {
  /**
   * Set the size of the index cache, specified as a number of entries
   *
+   * @deprecated Use session-level cache configuration instead.
+   * Create a Session with custom cache sizes and pass it to the connect() function.
+   *
   * The exact meaning of an "entry" will depend on the type of index:
   * - IVF: there is one entry for each IVF partition
   * - BTREE: there is one entry for the entire index
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -10,6 +10,7 @@ import {
 import {
  ConnectionOptions,
  Connection as LanceDbConnection,
+  Session,
 } from "./native.js";

 export {
@@ -51,6 +52,8 @@ export {
  OpenTableOptions,
 } from "./connection";

+export { Session } from "./native.js";
+
 export {
  ExecutableQuery,
  Query,
@@ -131,6 +134,7 @@ export { IntoSql, packBits } from "./util";
 export async function connect(
  uri: string,
  options?: Partial<ConnectionOptions>,
+  session?: Session,
 ): Promise<Connection>;
 /**
 * Connect to a LanceDB instance at the given URI.
@@ -149,31 +153,43 @@ export async function connect(
 *   storageOptions: {timeout: "60s"}
 * });
 * ```
+ *
+ * @example
+ * ```ts
+ * const session = Session.default();
+ * const conn = await connect({
+ *   uri: "/path/to/database",
+ *   session: session
+ * });
+ * ```
 */
 export async function connect(
  options: Partial<ConnectionOptions> & { uri: string },
 ): Promise<Connection>;
 export async function connect(
  uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
-  options: Partial<ConnectionOptions> = {},
+  options?: Partial<ConnectionOptions>,
 ): Promise<Connection> {
  let uri: string | undefined;
+  let finalOptions: Partial<ConnectionOptions> = {};
+
  if (typeof uriOrOptions !== "string") {
    const { uri: uri_, ...opts } = uriOrOptions;
    uri = uri_;
-    options = opts;
+    finalOptions = opts;
  } else {
    uri = uriOrOptions;
+    finalOptions = options || {};
  }

  if (!uri) {
    throw new Error("uri is required");
  }

-  options = (options as ConnectionOptions) ?? {};
-  (<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
-    (<ConnectionOptions>options).storageOptions,
+  finalOptions = (finalOptions as ConnectionOptions) ?? {};
+  (<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
+    (<ConnectionOptions>finalOptions).storageOptions,
  );
-  const nativeConn = await LanceDbConnection.new(uri, options);
+  const nativeConn = await LanceDbConnection.new(uri, finalOptions);
  return new LocalConnection(nativeConn);
 }
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
 	"files": ["lancedb.darwin-arm64.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
 	"files": ["lancedb.darwin-x64.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
 	"files": ["lancedb.linux-arm64-gnu.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,13 +1,13 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
 	"files": ["lancedb.linux-arm64-musl.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
 	"libc": ["musl"]
-}
+}
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
 	"files": ["lancedb.linux-x64-gnu.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,13 +1,13 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
 	"files": ["lancedb.linux-x64-musl.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
 	"libc": ["musl"]
-}
+}
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "os": [
    "win32"
  ],
@@ -11,7 +11,7 @@
  "files": [
    "lancedb.win32-arm64-msvc.node"
  ],
-  "license": "Apache 2.0",
+  "license": "Apache-2.0",
  "engines": {
    "node": ">= 18"
  }
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.21.2-beta.0",
+	"version": "0.21.2",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
 	"files": ["lancedb.win32-x64-msvc.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,17 +1,17 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.21.2-beta.0",
+      "version": "0.21.2",
      "cpu": [
        "x64",
        "arm64"
      ],
-      "license": "Apache 2.0",
+      "license": "Apache-2.0",
      "os": [
        "darwin",
        "linux",
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.21.2-beta.0",
+  "version": "0.21.2",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
@@ -36,7 +36,7 @@
      ]
    }
  },
-  "license": "Apache 2.0",
+  "license": "Apache-2.0",
  "devDependencies": {
    "@aws-sdk/client-dynamodb": "^3.33.0",
    "@aws-sdk/client-kms": "^3.33.0",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -74,6 +74,10 @@ impl Connection {
            builder = builder.host_override(&host_override);
        }

+        if let Some(session) = options.session {
+            builder = builder.session(session.inner.clone());
+        }
+
        Ok(Self::inner_new(builder.execute().await.default_error()?))
    }

--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -14,6 +14,7 @@ pub mod merge;
 mod query;
 pub mod remote;
 mod rerankers;
+mod session;
 mod table;
 mod util;

@@ -34,6 +35,9 @@ pub struct ConnectionOptions {
    ///
    /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
    pub storage_options: Option<HashMap<String, String>>,
+    /// (For LanceDB OSS only): the session to use for this connection. Holds
+    /// shared caches and other session-specific state.
+    pub session: Option<session::Session>,

    /// (For LanceDB cloud only): configuration for the remote HTTP client.
    pub client_config: Option<remote::ClientConfig>,
--- a/nodejs/src/remote.rs
+++ b/nodejs/src/remote.rs
@@ -9,6 +9,12 @@ use napi_derive::*;
 #[napi(object)]
 #[derive(Debug)]
 pub struct TimeoutConfig {
+    /// The overall timeout for the entire request in seconds. This includes
+    /// connection, send, and read time. If the entire request doesn't complete
+    /// within this time, it will fail. Default is None (no overall timeout).
+    /// This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
+    /// as an integer number of seconds.
+    pub timeout: Option<f64>,
    /// The timeout for establishing a connection in seconds. Default is 120
    /// seconds (2 minutes). This can also be set via the environment variable
    /// `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds.
@@ -75,6 +81,7 @@ pub struct ClientConfig {
 impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
    fn from(config: TimeoutConfig) -> Self {
        Self {
+            timeout: config.timeout.map(std::time::Duration::from_secs_f64),
            connect_timeout: config
                .connect_timeout
                .map(std::time::Duration::from_secs_f64),
--- a/nodejs/src/session.rs
+++ b/nodejs/src/session.rs
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+use std::sync::Arc;
+
+use lancedb::{ObjectStoreRegistry, Session as LanceSession};
+use napi::bindgen_prelude::*;
+use napi_derive::*;
+
+/// A session for managing caches and object stores across LanceDB operations.
+///
+/// Sessions allow you to configure cache sizes for index and metadata caches,
+/// which can significantly impact memory use and performance. They can
+/// also be re-used across multiple connections to share the same cache state.
+#[napi]
+#[derive(Clone)]
+pub struct Session {
+    pub(crate) inner: Arc<LanceSession>,
+}
+
+impl std::fmt::Debug for Session {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Session")
+            .field("size_bytes", &self.inner.size_bytes())
+            .field("approx_num_items", &self.inner.approx_num_items())
+            .finish()
+    }
+}
+
+#[napi]
+impl Session {
+    /// Create a new session with custom cache sizes.
+    ///
+    /// # Parameters
+    ///
+    /// - `index_cache_size_bytes`: The size of the index cache in bytes.
+    ///   Index data is stored in memory in this cache to speed up queries.
+    ///   Defaults to 6GB if not specified.
+    /// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
+    ///   The metadata cache stores file metadata and schema information in memory.
+    ///   This cache improves scan and write performance.
+    ///   Defaults to 1GB if not specified.
+    #[napi(constructor)]
+    pub fn new(
+        index_cache_size_bytes: Option<BigInt>,
+        metadata_cache_size_bytes: Option<BigInt>,
+    ) -> napi::Result<Self> {
+        let index_cache_size = index_cache_size_bytes
+            .map(|size| size.get_u64().1 as usize)
+            .unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
+
+        let metadata_cache_size = metadata_cache_size_bytes
+            .map(|size| size.get_u64().1 as usize)
+            .unwrap_or(1024 * 1024 * 1024); // 1GB default
+
+        let session = LanceSession::new(
+            index_cache_size,
+            metadata_cache_size,
+            Arc::new(ObjectStoreRegistry::default()),
+        );
+
+        Ok(Self {
+            inner: Arc::new(session),
+        })
+    }
+
+    /// Create a session with default cache sizes.
+    ///
+    /// This is equivalent to creating a session with 6GB index cache
+    /// and 1GB metadata cache.
+    #[napi(factory)]
+    pub fn default() -> Self {
+        Self {
+            inner: Arc::new(LanceSession::default()),
+        }
+    }
+
+    /// Get the current size of the session caches in bytes.
+    #[napi]
+    pub fn size_bytes(&self) -> BigInt {
+        BigInt::from(self.inner.size_bytes())
+    }
+
+    /// Get the approximate number of items cached in the session.
+    #[napi]
+    pub fn approx_num_items(&self) -> u32 {
+        self.inner.approx_num_items() as u32
+    }
+}
+
+// Implement FromNapiValue for Session to work with napi(object)
+impl napi::bindgen_prelude::FromNapiValue for Session {
+    unsafe fn from_napi_value(
+        env: napi::sys::napi_env,
+        napi_val: napi::sys::napi_value,
+    ) -> napi::Result<Self> {
+        let object: napi::bindgen_prelude::ClassInstance<Session> =
+            napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
+        let copy = object.clone();
+        Ok(copy)
+    }
+}
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.24.2-beta.1"
+current_version = "0.24.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -1,2 +1,3 @@
 # Test data created by some example tests
-data/
+data/
+_lancedb.pyd
--- a/python/CLAUDE.md
+++ b/python/CLAUDE.md
@@ -0,0 +1,19 @@
+These are the Python bindings of LanceDB.
+The core Rust library is in the `../rust/lancedb` directory, the rust binding
+code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
+
+Common commands:
+
+* Build: `make develop`
+* Format: `make format`
+* Lint: `make check`
+* Fix lints: `make fix`
+* Test: `make test`
+* Doc test: `make doctest`
+
+Before committing changes, run lints and then formatting.
+
+When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
+
+When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
+with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.24.2-beta.1"
+version = "0.24.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -68,8 +68,9 @@ dev = [
    "pyright",
    'typing-extensions>=4.0.0; python_version < "3.11"',
 ]
-docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
+docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
 clip = ["torch", "pillow", "open-clip-torch"]
+siglip = ["torch", "pillow", "transformers>=4.41.0","sentencepiece"]
 embeddings = [
    "requests>=2.31.0",
    "openai>=1.6.1",
@@ -85,8 +86,9 @@ embeddings = [
    "boto3>=1.28.57",
    "awscli>=1.29.57",
    "botocore>=1.31.57",
+    'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
    "ollama>=0.3.0",
-    "ibm-watsonx-ai>=1.1.2",
+    "sentencepiece"
 ]
 azure = ["adlfs>=2024.2.0"]

--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -18,6 +18,7 @@ from .remote import ClientConfig
 from .remote.db import RemoteDBConnection
 from .schema import vector
 from .table import AsyncTable
+from ._lancedb import Session


 def connect(
@@ -30,6 +31,7 @@ def connect(
    request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
    client_config: Union[ClientConfig, Dict[str, Any], None] = None,
    storage_options: Optional[Dict[str, str]] = None,
+    session: Optional[Session] = None,
    **kwargs: Any,
 ) -> DBConnection:
    """Connect to a LanceDB database.
@@ -64,6 +66,12 @@ def connect(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.github.io/lancedb/guides/storage/>
+    session: Session, optional
+        (For LanceDB OSS only)
+        A session to use for this connection. Sessions allow you to configure
+        cache sizes for index and metadata caches, which can significantly
+        impact memory use and performance. They can also be re-used across
+        multiple connections to share the same cache state.

    Examples
    --------
@@ -92,7 +100,7 @@ def connect(
        if api_key is None:
            api_key = os.environ.get("LANCEDB_API_KEY")
        if api_key is None:
-            raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
+            raise ValueError(f"api_key is required to connect to LanceDB cloud: {uri}")
        if isinstance(request_thread_pool, int):
            request_thread_pool = ThreadPoolExecutor(request_thread_pool)
        return RemoteDBConnection(
@@ -113,6 +121,7 @@ def connect(
        uri,
        read_consistency_interval=read_consistency_interval,
        storage_options=storage_options,
+        session=session,
    )


@@ -125,6 +134,7 @@ async def connect_async(
    read_consistency_interval: Optional[timedelta] = None,
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
    storage_options: Optional[Dict[str, str]] = None,
+    session: Optional[Session] = None,
 ) -> AsyncConnection:
    """Connect to a LanceDB database.

@@ -158,6 +168,12 @@ async def connect_async(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.github.io/lancedb/guides/storage/>
+    session: Session, optional
+        (For LanceDB OSS only)
+        A session to use for this connection. Sessions allow you to configure
+        cache sizes for index and metadata caches, which can significantly
+        impact memory use and performance. They can also be re-used across
+        multiple connections to share the same cache state.

    Examples
    --------
@@ -197,6 +213,7 @@ async def connect_async(
            read_consistency_interval_secs,
            client_config,
            storage_options,
+            session,
        )
    )

@@ -212,6 +229,7 @@ __all__ = [
    "DBConnection",
    "LanceDBConnection",
    "RemoteDBConnection",
+    "Session",
    "__version__",
 ]

@@ -223,4 +241,4 @@ def __warn_on_fork():


 if hasattr(os, "register_at_fork"):
-    os.register_at_fork(before=__warn_on_fork)
+    os.register_at_fork(before=__warn_on_fork)  # type: ignore[attr-defined]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -6,6 +6,19 @@ import pyarrow as pa
 from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
 from .remote import ClientConfig

+class Session:
+    def __init__(
+        self,
+        index_cache_size_bytes: Optional[int] = None,
+        metadata_cache_size_bytes: Optional[int] = None,
+    ): ...
+    @staticmethod
+    def default() -> "Session": ...
+    @property
+    def size_bytes(self) -> int: ...
+    @property
+    def approx_num_items(self) -> int: ...
+
 class Connection(object):
    uri: str
    async def table_names(
@@ -89,6 +102,7 @@ async def connect(
    read_consistency_interval: Optional[float],
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
    storage_options: Optional[Dict[str, str]],
+    session: Optional[Session],
 ) -> Connection: ...

 class RecordBatchStream:
--- a/python/python/lancedb/common.py
+++ b/python/python/lancedb/common.py
@@ -94,9 +94,9 @@ def data_to_reader(
    else:
        raise TypeError(
            f"Unknown data type {type(data)}. "
-            "Please check "
-            "https://lancedb.github.io/lance/read_and_write.html "
-            "to see supported types."
+            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
+            "pyarrow Table/RecordBatch, or Pydantic models. "
+            "See https://lancedb.github.io/lancedb/guides/tables/ for examples."
        )


--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -37,6 +37,7 @@ if TYPE_CHECKING:
    from ._lancedb import Connection as LanceDbConnection
    from .common import DATA, URI
    from .embeddings import EmbeddingFunctionConfig
+    from ._lancedb import Session


 class DBConnection(EnforceOverrides):
@@ -247,6 +248,9 @@ class DBConnection(EnforceOverrides):
        name: str
            The name of the table.
        index_cache_size: int, default 256
+            **Deprecated**: Use session-level cache configuration instead.
+            Create a Session with custom cache sizes and pass it to lancedb.connect().
+
            Set the size of the index cache, specified as a number of entries

            The exact meaning of an "entry" will depend on the type of index:
@@ -354,6 +358,7 @@ class LanceDBConnection(DBConnection):
        *,
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
+        session: Optional[Session] = None,
    ):
        if not isinstance(uri, Path):
            scheme = get_uri_scheme(uri)
@@ -367,6 +372,7 @@ class LanceDBConnection(DBConnection):
        self._entered = False
        self.read_consistency_interval = read_consistency_interval
        self.storage_options = storage_options
+        self.session = session

        if read_consistency_interval is not None:
            read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -382,6 +388,7 @@ class LanceDBConnection(DBConnection):
                read_consistency_interval_secs,
                None,
                storage_options,
+                session,
            )

        self._conn = AsyncConnection(LOOP.run(do_connect()))
@@ -475,6 +482,17 @@ class LanceDBConnection(DBConnection):
        -------
        A LanceTable object representing the table.
        """
+        if index_cache_size is not None:
+            import warnings
+
+            warnings.warn(
+                "index_cache_size is deprecated. Use session-level cache "
+                "configuration instead. Create a Session with custom cache sizes "
+                "and pass it to lancedb.connect().",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
        return LanceTable.open(
            self,
            name,
@@ -820,6 +838,9 @@ class AsyncConnection(object):
            See available options at
            <https://lancedb.github.io/lancedb/guides/storage/>
        index_cache_size: int, default 256
+            **Deprecated**: Use session-level cache configuration instead.
+            Create a Session with custom cache sizes and pass it to lancedb.connect().
+
            Set the size of the index cache, specified as a number of entries

            The exact meaning of an "entry" will depend on the type of index:
--- a/python/python/lancedb/embeddings/init.py
+++ b/python/python/lancedb/embeddings/init.py
@@ -11,7 +11,7 @@ from .instructor import InstructorEmbeddingFunction
 from .ollama import OllamaEmbeddings
 from .open_clip import OpenClipEmbeddings
 from .openai import OpenAIEmbeddings
-from .registry import EmbeddingFunctionRegistry, get_registry
+from .registry import EmbeddingFunctionRegistry, get_registry, register
 from .sentence_transformers import SentenceTransformerEmbeddings
 from .gte import GteEmbeddings
 from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings
@@ -20,3 +20,4 @@ from .jinaai import JinaEmbeddings
 from .watsonx import WatsonxEmbeddings
 from .voyageai import VoyageAIEmbeddingFunction
 from .colpali import ColPaliEmbeddings
+from .siglip import SigLipEmbeddings
--- a/python/python/lancedb/embeddings/gte_mlx_model.py
+++ b/python/python/lancedb/embeddings/gte_mlx_model.py
@@ -9,11 +9,14 @@ from huggingface_hub import snapshot_download
 from pydantic import BaseModel
 from transformers import BertTokenizer

+from .utils import create_import_stub
+
 try:
    import mlx.core as mx
    import mlx.nn as nn
 except ImportError:
-    raise ImportError("You need to install MLX to use this model use - pip install mlx")
+    mx = create_import_stub("mlx.core", "mlx")
+    nn = create_import_stub("mlx.nn", "mlx")


 def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
@@ -72,7 +75,7 @@ class TransformerEncoder(nn.Module):
        super().__init__()
        self.layers = [
            TransformerEncoderLayer(dims, num_heads, mlp_dims)
-            for i in range(num_layers)
+            for _ in range(num_layers)
        ]

    def __call__(self, x, mask):
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import json
-from typing import Dict, Optional
+from typing import Dict, Optional, Type

 from .base import EmbeddingFunction, EmbeddingFunctionConfig

@@ -43,7 +43,7 @@ class EmbeddingFunctionRegistry:
        self._functions = {}
        self._variables = {}

-    def register(self, alias: str = None):
+    def register(self, alias: Optional[str] = None):
        """
        This creates a decorator that can be used to register
        an EmbeddingFunction.
@@ -75,7 +75,7 @@ class EmbeddingFunctionRegistry:
        """
        self._functions = {}

-    def get(self, name: str):
+    def get(self, name: str) -> Type[EmbeddingFunction]:
        """
        Fetch an embedding function class by name

--- a/python/python/lancedb/embeddings/siglip.py
+++ b/python/python/lancedb/embeddings/siglip.py
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import concurrent.futures
+import io
+import os
+from typing import TYPE_CHECKING, List, Union
+import urllib.parse as urlparse
+
+import numpy as np
+import pyarrow as pa
+from tqdm import tqdm
+from pydantic import PrivateAttr
+
+from ..util import attempt_import_or_raise
+from .base import EmbeddingFunction
+from .registry import register
+from .utils import IMAGES, url_retrieve
+
+if TYPE_CHECKING:
+    import PIL
+    import torch
+
+
+@register("siglip")
+class SigLipEmbeddings(EmbeddingFunction):
+    model_name: str = "google/siglip-base-patch16-224"
+    device: str = "cpu"
+    batch_size: int = 64
+    normalize: bool = True
+
+    _model = PrivateAttr()
+    _processor = PrivateAttr()
+    _tokenizer = PrivateAttr()
+    _torch = PrivateAttr()
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        transformers = attempt_import_or_raise("transformers")
+        self._torch = attempt_import_or_raise("torch")
+
+        self._processor = transformers.AutoProcessor.from_pretrained(self.model_name)
+        self._model = transformers.SiglipModel.from_pretrained(self.model_name)
+        self._model.to(self.device)
+        self._model.eval()
+        self._ndims = None
+
+    def ndims(self):
+        if self._ndims is None:
+            self._ndims = self.generate_text_embeddings("foo").shape[0]
+        return self._ndims
+
+    def compute_query_embeddings(
+        self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
+    ) -> List[np.ndarray]:
+        if isinstance(query, str):
+            return [self.generate_text_embeddings(query)]
+        else:
+            PIL = attempt_import_or_raise("PIL", "pillow")
+            if isinstance(query, PIL.Image.Image):
+                return [self.generate_image_embedding(query)]
+            else:
+                raise TypeError("SigLIP supports str or PIL Image as query")
+
+    def generate_text_embeddings(self, text: str) -> np.ndarray:
+        torch = self._torch
+        text_inputs = self._processor(
+            text=text,
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=64,
+        ).to(self.device)
+
+        with torch.no_grad():
+            text_features = self._model.get_text_features(**text_inputs)
+            if self.normalize:
+                text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+            return text_features.cpu().detach().numpy().squeeze()
+
+    def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
+        if isinstance(images, (str, bytes)):
+            images = [images]
+        elif isinstance(images, pa.Array):
+            images = images.to_pylist()
+        elif isinstance(images, pa.ChunkedArray):
+            images = images.combine_chunks().to_pylist()
+        return images
+
+    def compute_source_embeddings(
+        self, images: IMAGES, *args, **kwargs
+    ) -> List[np.ndarray]:
+        images = self.sanitize_input(images)
+        embeddings = []
+
+        for i in range(0, len(images), self.batch_size):
+            j = min(i + self.batch_size, len(images))
+            batch = images[i:j]
+            embeddings.extend(self._parallel_get(batch))
+        return embeddings
+
+    def _parallel_get(self, images: Union[List[str], List[bytes]]) -> List[np.ndarray]:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [
+                executor.submit(self.generate_image_embedding, image)
+                for image in images
+            ]
+            return [f.result() for f in tqdm(futures, desc="SigLIP Embedding")]
+
+    def generate_image_embedding(
+        self, image: Union[str, bytes, "PIL.Image.Image"]
+    ) -> np.ndarray:
+        image = self._to_pil(image)
+        image = self._processor(images=image, return_tensors="pt")["pixel_values"]
+        return self._encode_and_normalize_image(image)
+
+    def _encode_and_normalize_image(self, image_tensor: "torch.Tensor") -> np.ndarray:
+        torch = self._torch
+        with torch.no_grad():
+            image_features = self._model.get_image_features(
+                image_tensor.to(self.device)
+            )
+            if self.normalize:
+                image_features = image_features / image_features.norm(
+                    dim=-1, keepdim=True
+                )
+            return image_features.cpu().detach().numpy().squeeze()
+
+    def _to_pil(self, image: Union[str, bytes, "PIL.Image.Image"]):
+        PIL = attempt_import_or_raise("PIL", "pillow")
+        if isinstance(image, PIL.Image.Image):
+            return image.convert("RGB") if image.mode != "RGB" else image
+        elif isinstance(image, bytes):
+            return PIL.Image.open(io.BytesIO(image)).convert("RGB")
+        elif isinstance(image, str):
+            parsed = urlparse.urlparse(image)
+            if parsed.scheme == "file":
+                return PIL.Image.open(parsed.path).convert("RGB")
+            elif parsed.scheme == "":
+                path = image if os.name == "nt" else parsed.path
+                return PIL.Image.open(path).convert("RGB")
+            elif parsed.scheme.startswith("http"):
+                image_bytes = url_retrieve(image)
+                return PIL.Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            else:
+                raise NotImplementedError("Only local and http(s) urls are supported")
+        else:
+            raise ValueError(f"Unsupported image type: {type(image)}")
--- a/python/python/lancedb/embeddings/utils.py
+++ b/python/python/lancedb/embeddings/utils.py
@@ -21,6 +21,36 @@ from ..dependencies import pandas as pd
 from ..util import attempt_import_or_raise


+def create_import_stub(module_name: str, package_name: str = None):
+    """
+    Create a stub module that allows class definition but fails when used.
+    This allows modules to be imported for doctest collection even when
+    optional dependencies are not available.
+
+    Parameters
+    ----------
+    module_name : str
+        The name of the module to create a stub for
+    package_name : str, optional
+        The package name to suggest in the error message
+
+    Returns
+    -------
+    object
+        A stub object that can be used in place of the module
+    """
+
+    class _ImportStub:
+        def __getattr__(self, name):
+            return _ImportStub  # Return stub for chained access like nn.Module
+
+        def __call__(self, *args, **kwargs):
+            pkg = package_name or module_name
+            raise ImportError(f"You need to install {pkg} to use this functionality")
+
+    return _ImportStub()
+
+
 # ruff: noqa: PERF203
 def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
    def wrapper(fn):
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -14,7 +14,7 @@ from typing import (
    Literal,
    Optional,
    Tuple,
-    Type,
+    TypeVar,
    Union,
    Any,
 )
@@ -58,6 +58,8 @@ if TYPE_CHECKING:
    else:
        from typing_extensions import Self

+T = TypeVar("T", bound="LanceModel")
+

 # Pydantic validation function for vector queries
 def ensure_vector_query(
@@ -746,8 +748,8 @@ class LanceQueryBuilder(ABC):
        return self.to_arrow(timeout=timeout).to_pylist()

    def to_pydantic(
-        self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
-    ) -> List[LanceModel]:
+        self, model: type[T], *, timeout: Optional[timedelta] = None
+    ) -> list[T]:
        """Return the table as a list of pydantic models.

        Parameters
@@ -906,11 +908,11 @@ class LanceQueryBuilder(ABC):
        >>> plan = table.search(query).explain_plan(True)
        >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
-        GlobalLimitExec: skip=0, fetch=10
-          FilterExec: _distance@2 IS NOT NULL
-            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-              KNNVectorDistance: metric=l2
-                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+          GlobalLimitExec: skip=0, fetch=10
+            FilterExec: _distance@2 IS NOT NULL
+              SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+                KNNVectorDistance: metric=l2
+                  LanceRead: uri=..., projection=[vector], ...

        Parameters
        ----------
@@ -940,19 +942,19 @@ class LanceQueryBuilder(ABC):
        >>> plan = table.search(query).analyze_plan()
        >>> print(plan)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        AnalyzeExec verbose=true, metrics=[]
-          ProjectionExec: expr=[...], metrics=[...]
-            GlobalLimitExec: skip=0, fetch=10, metrics=[...]
-              FilterExec: _distance@2 IS NOT NULL,
-              metrics=[output_rows=..., elapsed_compute=...]
-                SortExec: TopK(fetch=10), expr=[...],
-                preserve_partitioning=[...],
-                metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
-                  KNNVectorDistance: metric=l2,
-                  metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
-                    LanceScan: uri=..., projection=[vector], row_id=true,
-                    row_addr=false, ordered=false,
-                    metrics=[output_rows=..., elapsed_compute=...,
-                    bytes_read=..., iops=..., requests=...]
+          TracedExec, metrics=[]
+            ProjectionExec: expr=[...], metrics=[...]
+              GlobalLimitExec: skip=0, fetch=10, metrics=[...]
+                FilterExec: _distance@2 IS NOT NULL,
+                metrics=[output_rows=..., elapsed_compute=...]
+                  SortExec: TopK(fetch=10), expr=[...],
+                  preserve_partitioning=[...],
+                  metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
+                    KNNVectorDistance: metric=l2,
+                    metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
+                      LanceRead: uri=..., projection=[vector], ...
+                      metrics=[output_rows=..., elapsed_compute=...,
+                      bytes_read=..., iops=..., requests=...]

        Returns
        -------
@@ -2043,7 +2045,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
          FilterExec: _distance@2 IS NOT NULL
            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
              KNNVectorDistance: metric=l2
-                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+                LanceRead: uri=..., projection=[vector], ...

        Parameters
        ----------
@@ -2429,7 +2431,7 @@ class AsyncQueryBase(object):
            FilterExec: _distance@2 IS NOT NULL
              SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
                KNNVectorDistance: metric=l2
-                  LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+                  LanceRead: uri=..., projection=[vector], ...

        Parameters
        ----------
@@ -3054,7 +3056,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
                FilterExec: _distance@2 IS NOT NULL
                  SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
                    KNNVectorDistance: metric=l2
-                      LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+                      LanceRead: uri=..., projection=[vector], ...
        <BLANKLINE>
        FTS Search Plan:
        ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
--- a/python/python/lancedb/remote/init.py
+++ b/python/python/lancedb/remote/init.py
@@ -17,6 +17,12 @@ class TimeoutConfig:

    Attributes
    ----------
+    timeout: Optional[timedelta]
+        The overall timeout for the entire request. This includes connection,
+        send, and read time. If the entire request doesn't complete within
+        this time, it will fail. Default is None (no overall timeout).
+        This can also be set via the environment variable
+        `LANCE_CLIENT_TIMEOUT`, as an integer number of seconds.
    connect_timeout: Optional[timedelta]
        The timeout for establishing a connection. Default is 120 seconds (2 minutes).
        This can also be set via the environment variable
@@ -31,6 +37,7 @@ class TimeoutConfig:
        `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer number of seconds.
    """

+    timeout: Optional[timedelta] = None
    connect_timeout: Optional[timedelta] = None
    read_timeout: Optional[timedelta] = None
    pool_idle_timeout: Optional[timedelta] = None
@@ -50,6 +57,7 @@ class TimeoutConfig:
            )

    def __post_init__(self):
+        self.timeout = self.__to_timedelta(self.timeout)
        self.connect_timeout = self.__to_timedelta(self.connect_timeout)
        self.read_timeout = self.__to_timedelta(self.read_timeout)
        self.pool_idle_timeout = self.__to_timedelta(self.pool_idle_timeout)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
David Myriel	3e9f0ac784	Update mkdocs.yml	2025-08-06 17:17:45 -07:00
Will Jones	8ffe992a6f	fix: always uses slashes in table uris (#2575 ) Closes #2574	2025-08-05 12:12:57 -07:00
Will Jones	9d683e4f0b	feat: infer vector columns when name contains 'vector' or 'embedding' (#2547 ) ## Summary - Enhanced vector column detection to use substring matching instead of exact matching - Now detects columns with names containing "vector" or "embedding" (case-insensitive) - Added integer vector support to Node.js implementation (matching Python) - Comprehensive test coverage for both float and integer vector types ## Changes ### Python (`python/python/lancedb/table.py`) - Updated `_infer_target_schema()` to use substring matching with helper function `_is_vector_column()` - Preserved original field names instead of forcing "vector" - Consolidated duplicate logic for better maintainability ### Node.js (`nodejs/lancedb/arrow.ts`) - Enhanced type inference with `nameSuggestsVectorColumn()` helper function - Added `isAllIntegers()` function with performance optimization (checks first 10 elements) - Implemented integer vector support using `Uint8` type (matching Python) - Improved type safety by removing `any` usage ### Tests - Python: Added `test_infer_target_schema_with_vector_embedding_names()` in `test_util.py` - Node.js: Added comprehensive test case in `arrow.test.ts` - Both test suites cover various naming patterns and integer/float vector types ## Examples of newly supported column names: - `user_vector`, `text_embedding`, `doc_embeddings` - `my_vector_field`, `embedding_model` - `VECTOR_COL`, `Vector_Mixed` (case-insensitive) - Both float and integer arrays are properly converted to fixed-size lists ## Test plan - [x] All existing tests pass (backward compatibility maintained) - [x] New tests pass for both Python and Node.js implementations - [x] Integer vector detection works correctly in Node.js - [x] Code passes linting and formatting checks - [x] Performance optimized for large vector arrays Fixes #2546 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-08-04 15:36:49 -07:00
Will Jones	0a1ea1858d	chore: remove vectordb package (#2564 ) ```shell git rm -r rust/ffi git rm -r node git rm ci/build_windows_artifacts.ps1 git rm ci/build_windows_artifacts_nodejs.ps1 git rm ci/build_linux_artifacts.sh git rm ci/build_macos_artifacts.sh git rm -r ci/manylinux_node git rm .github/workflows/node.yml ```	2025-08-04 14:14:33 -07:00
Poornachandra.A.N	7d0127b376	feat(embeddings): add siglip embedding support to lancedb (#2499 ) ### Summary This PR adds SigLIP (Sigmoid Loss Image Pretraining) as a new embedding model in the LanceDB embedding registry. SigLIP improves image-text alignment performance using sigmoid-based contrastive loss and offers robust zero-shot generalization. Fixes #2498 ### What’s Implemented #### 1. `SigLIP` Embedding Class * Added `SigLIP` support under `python/lancedb/embeddings/siglip.py` * Implements: * `compute_source_embeddings` * `_batch_generate_embeddings` * Normalization logic * Batch-wise progress logging for image embedding #### 2. Registry Integration * Registered `SigLIP` in `embeddings/__init__.py` * `SigLIP` now usable via `connect(..., embedding="siglip")` #### 3. Evaluation Benchmark Support * Added SigLIP to `test_embeddings_slow.py` for side-by-side benchmarking with OpenCLIP and ImageBind ### New Test Methods #### `test_siglip` * End-to-end test to verify embeddings table creation and vector shape for SigLIP ![WhatsApp Image 2025-07-10 at 18 00 27_a3368163](https://github.com/user-attachments/assets/e5582ee1-80a3-43d7-a7a1-26ceecce9f4d) #### `test_siglip_vs_openclip_vs_imagebind_benchmark_full` * Benchmarks: * Recall\@1 / 5 / 10 * mAP (Mean Average Precision) * Embedding & Search Latency * Dimensionality reporting ![WhatsApp Image 2025-07-10 at 18 12 13_22c67a84](https://github.com/user-attachments/assets/455bf30f-62b7-4684-a3f3-ad52e2a1ffe5) ### Notes * SigLIP outputs 768D embeddings (vs 512D for OpenCLIP) * Benchmark shows competitive performance despite higher dimensionality * I'm still new to contributing to open-source and learning as I go. Please feel free to suggest any improvements — I'm happy to make changes!	2025-08-04 11:42:39 -07:00
Will Jones	02595dc475	feat: add overall timeout parameter to remote client (#2550 ) ## Summary - Adds an overall `timeout` parameter to `TimeoutConfig` that limits the total time for the entire request - Can be set via config or `LANCE_CLIENT_TIMEOUT` environment variable - Exposed in Python and Node.js bindings - Includes comprehensive tests ## Test plan - [x] Unit tests for Rust TimeoutConfig - [x] Integration tests for Python bindings - [x] Integration tests for Node.js bindings - [x] All existing tests pass 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>	2025-08-04 10:06:55 -07:00
Reed Loden	f23327af79	fix: use SPDX-compliant license name for nodejs packages (#2558 ) Update license field from `Apache 2.0` to be `Apache-2.0` for all Node.js packages. This was causing GitHub's Dependency Review license check to fail with: > The validity of the licenses of the dependencies below could not be determined. Ensure that they are valid SPDX licenses	2025-08-04 09:54:53 -07:00
Wyatt Alt	c7afa724dd	chore: update npm lockfile (#2563 )	2025-07-30 18:28:06 -07:00
BubbleCal	c359cec504	chore: upgrade lance to 0.32.1-beta.2 (#2562 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-07-30 14:31:04 -07:00
Mark McCaskey	fe76496a59	fix: `.nprobes` method in python bindings, improve error messages (#2556 ) `nprobes` with a value greater than 20 fails with the minimum error: ``` self = <lancedb.query.AsyncVectorQuery object at 0x10b749720>, minimum_nprobes = 30 def minimum_nprobes(self, minimum_nprobes: int) -> Self: """Set the minimum number of probes to use. See `nprobes` for more details. These partitions will be searched on every indexed vector query and will increase recall at the expense of latency. """ > self._inner.minimum_nprobes(minimum_nprobes) E ValueError: Invalid input, minimum_nprobes must be less than or equal to maximum_nprobes python/lancedb/query.py:2744: ValueError ``` Putting the max set before the min seems reasonable but it causes this reasonable case to fail: ``` def test_nprobes_min_max_works_sync(table): LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(4).to_list() ``` with ``` self = <lancedb.query.AsyncVectorQuery object at 0x1203f1c90>, maximum_nprobes = 4 def maximum_nprobes(self, maximum_nprobes: int) -> Self: """Set the maximum number of probes to use. See `nprobes` for more details. If this value is greater than `minimum_nprobes` then the excess partitions will be searched only if we have not found enough results. This can be useful when there is a narrow filter to allow these queries to spend more time searching and avoid potential false negatives. If this value is 0 then no limit will be applied and all partitions could be searched if needed to satisfy the limit. """ > self._inner.maximum_nprobes(maximum_nprobes) E ValueError: Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes python/lancedb/query.py:2761: ValueError ```. The case I care about is where min == max, but this solution handles it even if they're not. If both min and max exist, we set both to the minimum and then set the max. This isn't 100% the same as the minimum setter checks for 0 on the min and `.nprobes` does not do any sanity checking at all. But I figured this was the most reasonable and general solution without touching more of this code. As part of this I noticed the error messages were a bit ambiguous so I made them symmetric and clarified them while I was here.	2025-07-30 09:23:25 -07:00
Weston Pace	67ec1fe75c	feat: don't repartition for the sake of the metadata eraser (#2559 ) The `MetadataEraserExec` is super lightweight and doesn't really justify partitioning. I had a plan recently that was partitioning just for this node and that seems wasteful.	2025-07-29 19:26:30 -07:00
Lance Release	70d9b04ba5	Bump version: 0.21.2-beta.2 → 0.21.2	2025-07-25 20:32:41 +00:00
Lance Release	b0d4a79c35	Bump version: 0.21.2-beta.1 → 0.21.2-beta.2	2025-07-25 20:31:50 +00:00
Lance Release	f79295c697	Bump version: 0.24.2-beta.2 → 0.24.2	2025-07-25 20:31:15 +00:00
Lance Release	381fad9b65	Bump version: 0.24.2-beta.1 → 0.24.2-beta.2	2025-07-25 20:31:15 +00:00
Tristan Zajonc	055bf91d3e	fix: handle empty list with schema in table creation (#2548 ) ## Summary Fixes IndexError when creating tables with empty list data and a provided schema. Previously, `_into_pyarrow_reader()` would attempt to access `data[0]` on empty lists, causing an IndexError. Now properly handles empty lists by using the provided schema. Also adds regression tests for GitHub issues #1968 and #303 to prevent future regressions with empty table scenarios. ## Changes - Fix IndexError in `_into_pyarrow_reader()` for empty list + schema case - Add Optional[pa.Schema] parameter to handle empty data gracefully - Add `test_create_table_empty_list_with_schema` for the IndexError fix - Add `test_create_empty_then_add_data` for issue #1968 - Add `test_search_empty_table` for issue #303 ## Test plan - [x] All new regression tests pass - [x] Existing tests continue to pass - [x] Code formatted with `make format`	2025-07-25 10:23:43 +08:00
Will Jones	050f0086b8	feat: upgrade Lance to v0.32.0 (#2543 ) Changelog: https://github.com/lancedb/lance/releases/tag/v0.32.0 Fixes #2521	2025-07-24 19:22:53 -07:00
Tristan Zajonc	10fa23e0d6	fix(python): expose register function in embeddings module (#2544 ) ## Summary Fixes #2541 Problem: The `register` function was not accessible via `from lancedb.embeddings import register` as documented, causing ImportError for users trying to create custom embedding functions. Solution: Added `register` to the exports in `python/lancedb/embeddings/__init__.py` to match the documented API and follow the same pattern as other registry functions (`get_registry`, `EmbeddingFunctionRegistry`). Root Cause: The function existed in `lancedb.embeddings.registry` but wasn't exposed through the main embeddings module interface. ## Changes - Add `register` to imports in `/python/python/lancedb/embeddings/__init__.py` ## Test Plan - [x] Verified `from lancedb.embeddings import register` works as documented - [x] Confirmed existing embedding tests pass - [x] Checked that the fix follows existing patterns (same as `get_registry`) - [x] Validated linting and formatting passes ## References Fixes #2541	2025-07-24 15:30:06 -07:00
yihong	43d9fc28b0	fix: can not build on python3.9 for dev (#2477 ) This patch fix can not build on python3.9 dev the reason is that for ibm-watsonx-ai the min version is py3.10 more can check on `pyoven` https://pyoven.org/package/ibm-watsonx-ai/ also fix tiny md lint --------- Signed-off-by: yihong0618 <zouzou0208@gmail.com>	2025-07-24 12:39:04 -07:00
aniaan	f45f0d0431	fix(python): correct type annotations in EmbeddingFunctionRegistry (#2478 ) - Fix register() method's alias parameter type from 'str = None' to 'Optional[str] = None' - Add return type annotation 'Type[EmbeddingFunction]' to get() method - Import Type from typing module for proper type hints	2025-07-24 12:31:49 -07:00
Tristan Zajonc	b9e3c36d82	fix: replace broken documentation URLs in error messages (#2533 ) Replaces broken 404 URL and unhelpful documentation links in type error messages with working URL and inline list of supported data types. Before: Points to https://lancedb.github.io/lance/read_and_write.html (404 error) After: Lists supported types inline and points to https://lancedb.github.io/lancedb/guides/tables/	2025-07-24 12:30:27 -07:00
Chen Chongchen	3cd7dd3375	fix: to_pydantic typing (#2517 ) currently, to_pydantic will always return LanceModel. If type checking is enabled in my project. I have to use `cast(data, List[RealModelType])` to solve type error. This PR uses generic to solve this problem.	2025-07-24 12:30:15 -07:00
Tristan Zajonc	12d4ce4cfe	fix: resolve flaky Node.js integration test for mirrored store (#2539 ) ## Summary - Fixed flaky Node.js integration test for mirrored store functionality - Converted callback-based `fs.readdir()` to `fs.promises.readdir()` with proper async/await - Used unique temporary directories to prevent test isolation issues - Updated test expectations to match current IVF-PQ index file structure ## Problem The mirrored store integration test was experiencing random failures in CI with errors like: - `expected 2 to equal 1` at various assertion points - `done() called multiple times` ## Root Causes Identified 1. Race conditions: Mixing callback-based filesystem operations with async functions created timing issues where assertions ran before filesystem operations completed 2. Test isolation: Multiple tests shared the same temp directory (`tmpdir()`), causing one test to see files from another 3. Outdated expectations: IVF-PQ indexes now create 2 files (`auxiliary.idx` + `index.idx`) instead of 1, but the test expected only 1 ## Solution - Replace all `fs.readdir()` callbacks with `fs.promises.readdir()` and `await` - Use `fs.promises.mkdtemp()` to create unique temporary directories for each test run - Update index file count expectations from 1 to 2 files to match current Lance behavior - Add descriptive assertion labels for easier debugging ## Analysis The mirroring implementation in `MirroringObjectStore::put_opts` is synchronous - it awaits writes to both secondary (local) and primary (S3) stores before returning. The test failures were due to callback/async pattern mismatch and test isolation issues, not actual async mirroring behavior. ## Test plan - [x] Local tests are running without timing-based failures - [x] Integration tests with AWS credentials pass in CI This resolves the flaky failures including 'expected 2 to equal 1' assertions and 'done() called multiple times' errors seen in CI runs.	2025-07-24 12:07:05 -07:00
Will Jones	3d1f102087	feat: allow Python and Typescript users to create `Session`s (#2530 ) ## Summary - Exposes `Session` in Python and Typescript so users can set the `index_cache_size_bytes` and `metadata_cache_size_bytes` * The `Session` is attached to the `Connection`, and thus shared across all tables in that connection. - Adds deprecation warnings for table-level cache configuration 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-07-24 12:06:29 -07:00
Tristan Zajonc	81afd8a42f	fix: use local random state in FTS test fixtures to prevent flaky failures (#2532 ) ## Summary Fixes intermittent CI failures in `test_search_fts[False]` where boolean FTS queries were returning fewer results than expected due to non-deterministic test data generation. ## Problem The test was using global `random` and `np.random` without seeding, causing the boolean query `MatchQuery("puppy", "text") & MatchQuery("runs", "text")` to sometimes return only 3 results instead of the expected 5, leading to `AssertionError: assert 3 == 5`. ## Solution - Replace global random calls with local `random.Random(42)` and `np.random.RandomState(42)` objects in test fixtures - Ensures deterministic test data while maintaining test isolation - No impact on other tests since random state is scoped to fixtures only ## Test Results - ✅ `test_search_fts[False]` now passes consistently - ✅ All other FTS tests continue to pass - ✅ No regression in other test suites (verified with `test_basic`) - ✅ Maintains existing test behavior and coverage	2025-07-24 11:30:02 -07:00
Tristan Zajonc	c2aa03615a	fix: correct grammar in LanceDB cloud connection error message (#2537 ) ## Summary Fixed a minor grammar error in the error message for missing API key when connecting to LanceDB cloud. ## Changes - Changed 'api_key is required to connected LanceDB cloud' to 'api_key is required to connect to LanceDB cloud' - Location: `python/python/lancedb/__init__.py:95` ## Test plan - Error message formatting is correct and grammatical - No functional changes to existing behavior	2025-07-24 09:56:06 -07:00
Tristan Zajonc	d2c6759e7f	fix: use import stubs to prevent MLX doctest collection failures (#2536 ) ## Summary - Add `create_import_stub()` helper to `embeddings/utils.py` for handling optional dependencies - Fix MLX doctest collection failures by using import stubs in `gte_mlx_model.py` - Module now imports successfully for doctest collection even when MLX is not installed ## Changes - New utility function: `create_import_stub()` creates placeholder objects that allow class inheritance but raise helpful errors when used - Updated MLX model: Uses import stubs instead of direct imports that fail immediately - Graceful degradation: Clear error messages when MLX functionality is accessed without MLX installed ## Test Results - ✅ `pytest --doctest-modules python/lancedb` now passes (with and without MLX installed) - ✅ All existing tests continue to pass - ✅ MLX functionality works normally when MLX is installed - ✅ Helpful error messages when MLX functionality is used without MLX installed Fixes #2538 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-07-23 16:25:33 -07:00
Weston Pace	94fb9f364a	feat: update lance version to 0.32.0-b2 (#2525 )	2025-07-23 12:23:10 -07:00
Will Jones	fbff244ed8	chore: add claude md files (#2531 ) Gives basic context to Claude about how to do common tasks in the repo.	2025-07-23 12:20:36 -07:00
Xuanwo	7e7466d224	ci: enable trust publishing for rust crates (#2529 )	2025-07-23 14:53:52 +08:00
Lance Release	cceaf27d79	Bump version: 0.21.2-beta.0 → 0.21.2-beta.1	2025-07-22 15:41:13 +00:00