Update mkdocs.yml

fix: always uses slashes in table uris (#2575 )
Closes #2574
2025-12-23 21:39:57 +00:00 · 2025-08-06 17:17:45 -07:00 · 2025-08-05 12:12:57 -07:00 · 2025-08-04 15:36:49 -07:00 · 2025-08-04 14:14:33 -07:00 · 2025-08-04 11:42:39 -07:00
99 changed files with 739 additions and 14238 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.2-beta.1"
+current_version = "0.21.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -50,11 +50,6 @@ pre_commit_hooks = [
 optional_value = "final"
 values = ["beta", "final"]

-[[tool.bumpversion.files]]
-filename = "node/package.json"
-replace = "\"version\": \"{new_version}\","
-search = "\"version\": \"{current_version}\","
-
 [[tool.bumpversion.files]]
 filename = "nodejs/package.json"
 replace = "\"version\": \"{new_version}\","
@@ -66,39 +61,8 @@ glob = "nodejs/npm/*/package.json"
 replace = "\"version\": \"{new_version}\","
 search = "\"version\": \"{current_version}\","

-# vectodb node binary packages
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
-search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
-search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
-search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
-
-[[tool.bumpversion.files]]
-glob = "node/package.json"
-replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
-search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
-
 # Cargo files
 # ------------
-[[tool.bumpversion.files]]
-filename = "rust/ffi/node/Cargo.toml"
-replace = "\nversion = \"{new_version}\""
-search = "\nversion = \"{current_version}\""
-
 [[tool.bumpversion.files]]
 filename = "rust/lancedb/Cargo.toml"
 replace = "\nversion = \"{new_version}\""
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -1,147 +0,0 @@
-name: Node
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    paths:
-      - node/**
-      - rust/ffi/node/**
-      - .github/workflows/node.yml
-      - docker-compose.yml
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-env:
-  # Disable full debug symbol generation to speed up CI build and keep memory down
-  # "1" means line tables only, which is useful for panic tracebacks.
-  #
-  # Use native CPU to accelerate tests if possible, especially for f16
-  # target-cpu=haswell fixes failing ci build
-  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
-  RUST_BACKTRACE: "1"
-
-jobs:
-  linux:
-    name: Linux (Node ${{ matrix.node-version }})
-    timeout-minutes: 30
-    strategy:
-      matrix:
-        node-version: [ "18", "20" ]
-    runs-on: "ubuntu-22.04"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: ${{ matrix.node-version }}
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: npm run test
-  macos:
-    timeout-minutes: 30
-    runs-on: "macos-13"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: brew install protobuf
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: |
-        npm run test
-  aws-integtest:
-    timeout-minutes: 45
-    runs-on: "ubuntu-22.04"
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    env:
-      AWS_ACCESS_KEY_ID: ACCESSKEY
-      AWS_SECRET_ACCESS_KEY: SECRETKEY
-      AWS_DEFAULT_REGION: us-west-2
-      # this one is for s3
-      AWS_ENDPOINT: http://localhost:4566
-      # this one is for dynamodb
-      DYNAMODB_ENDPOINT: http://localhost:4566
-      ALLOW_HTTP: true
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        lfs: true
-    - uses: actions/setup-node@v3
-      with:
-        node-version: 20
-        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
-    - name: start local stack
-      run: docker compose -f ../docker-compose.yml up -d --wait
-    - name: create s3
-      run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
-    - name: create ddb
-      run: |
-        aws dynamodb create-table \
-          --table-name lancedb-integtest \
-          --attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
-          --key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
-          --provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
-          --endpoint-url $DYNAMODB_ENDPOINT
-    - uses: Swatinem/rust-cache@v2
-    - name: Install dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-    - name: Build
-      run: |
-        npm ci
-        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/lancedb-vectordb-*.tgz
-        # Remove index.node to test with dependency installed
-        rm index.node
-    - name: Test
-      run: npm run integration-test
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,200 +365,3 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
-
-
-  # ----------------------------------------------------------------------------
-  # vectordb release (legacy)
-  # ----------------------------------------------------------------------------
-  # TODO: delete this when we drop vectordb
-  node:
-    name: vectordb Typescript
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          cache: "npm"
-          cache-dependency-path: node/package-lock.json
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Build
-        run: |
-          npm ci
-          npm run tsc
-          npm pack
-      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-package
-          path: |
-            node/vectordb-*.tgz
-
-  node-macos:
-    name: vectordb ${{ matrix.config.arch }}
-    strategy:
-      matrix:
-        config:
-          - arch: x86_64-apple-darwin
-            runner: macos-13
-          - arch: aarch64-apple-darwin
-            # xlarge is implicitly arm64.
-            runner: macos-14
-    runs-on: ${{ matrix.config.runner }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install system dependencies
-        run: brew install protobuf
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build MacOS native node modules
-        run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
-      - name: Upload Darwin Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-darwin-${{ matrix.config.arch }}
-          path: |
-            node/dist/lancedb-vectordb-darwin*.tgz
-
-  node-linux-gnu:
-    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
-    runs-on: ${{ matrix.config.runner }}
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - arch: x86_64
-            runner: ubuntu-latest
-          - arch: aarch64
-            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
-            runner: warp-ubuntu-latest-arm64-4x
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      # To avoid OOM errors on ARM, we create a swap file.
-      - name: Configure aarch64 build
-        if: ${{ matrix.config.arch == 'aarch64' }}
-        run: |
-          free -h
-          sudo fallocate -l 16G /swapfile
-          sudo chmod 600 /swapfile
-          sudo mkswap /swapfile
-          sudo swapon /swapfile
-          echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
-          # print info
-          swapon --show
-          free -h
-      - name: Build Linux Artifacts
-        run: |
-          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
-      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-linux-${{ matrix.config.arch }}-gnu
-          path: |
-            node/dist/lancedb-vectordb-linux*.tgz
-
-  node-windows:
-    name: vectordb ${{ matrix.target }}
-    runs-on: windows-2022
-    strategy:
-      fail-fast: false
-      matrix:
-        target: [x86_64-pc-windows-msvc]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Install Protoc v21.12
-        working-directory: C:\
-        run: |
-          New-Item -Path 'C:\protoc' -ItemType Directory
-          Set-Location C:\protoc
-          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
-          7z x protoc.zip
-          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
-        shell: powershell
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build Windows native node modules
-        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
-      - name: Upload Windows Artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: node-native-windows
-          path: |
-            node/dist/lancedb-vectordb-win32*.tgz
-
-  release:
-    name: vectordb NPM Publish
-    needs: [node, node-macos, node-linux-gnu, node-windows]
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-    # Only runs on tags that matches the make-release action
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: actions/download-artifact@v4
-        with:
-          pattern: node-*
-      - name: Display structure of downloaded files
-        run: ls -R
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          registry-url: "https://registry.npmjs.org"
-      - name: Publish to NPM
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        run: |
-          # Tag beta as "preview" instead of default "latest". See lancedb
-          # npm publish step for more info.
-          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
-            PUBLISH_ARGS="--tag preview"
-          fi
-
-          mv */*.tgz .
-          for filename in *.tgz; do
-            npm publish $PUBLISH_ARGS $filename
-          done
-      - name: Deprecate
-        env:
-          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        # We need to deprecate the old package to avoid confusion.
-        # Each time we publish a new version, it gets undeprecated.
-        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: main
-      - name: Update package-lock.json
-        run: |
-          git config user.name 'Lance Release'
-          git config user.email 'lance-dev@lancedb.com'
-          bash ci/update_lockfiles.sh
-      - name: Push new commit
-        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-          branch: main
-      - name: Notify Slack Action
-        uses: ravsamhq/notify-slack-action@2.3.0
-        if: ${{ always() }}
-        with:
-          status: ${{ job.status }}
-          notify_when: "failure"
-          notification_title: "{workflow} is failing"
-        env:
-          SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1480,7 +1480,7 @@ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
 dependencies = [
 "glob",
 "libc",
- "libloading 0.8.8",
+ "libloading",
 ]

 [[package]]
@@ -1573,15 +1573,6 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"

-[[package]]
-name = "conv"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299"
-dependencies = [
- "custom_derive",
-]
-
 [[package]]
 name = "convert_case"
 version = "0.6.0"
@@ -1797,12 +1788,6 @@ dependencies = [
 "syn 2.0.103",
 ]

-[[package]]
-name = "custom_derive"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9"
-
 [[package]]
 name = "darling"
 version = "0.20.11"
@@ -2852,9 +2837,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99b0ce83d91fe637d97c127ac8df19f57e6012a5472c339154e5100cb107df4c"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "rand 0.8.5",
@@ -3967,9 +3951,8 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7484555bbe6f7898d6a283f89ecd3e2ba85a0f28d9a9e6f15f3018d8adaebdd9"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4031,9 +4014,8 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8674ce4b27d131ac98692dbc0b28f43690defa6ca63303b3cab21e6beaf43868"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4050,9 +4032,8 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1dd99bf06d5e322e81ff84cc2ce12b463836b4fba2bc1e0223085e1c8d7b71a"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4087,9 +4068,8 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29e78724715c1cb255ea3ac749b617406d91db6565ea77d531c1aba46716efc4"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4117,9 +4097,8 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cc5fa5f59bf65d02118fcc05615b511c03222f5240c4a18218f1297f97bcdf7"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4135,9 +4114,8 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a550fe9d4d931c48177691b9c085baf158bfde4ed7b6055eb27fed54174e5767"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrayref",
 "arrow",
@@ -4176,9 +4154,8 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2d338a50e09bc5af5773cdc5d269680288847d1d34a4622063cce8ad4b5375b"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4212,9 +4189,8 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14cbcb44403ee477ab4e53194e4c322295959785a7056b33043a2f9f01fa0f8a"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4267,9 +4243,8 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "933c8dad56aa3048c421f336b20f23f507cc47271fcc18bea8b4052c247a170e"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4309,9 +4284,8 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2540ae40b7c35901be13541437c947aadb5a6afb2110f7275e90884aeee4cc07"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4334,9 +4308,8 @@ dependencies = [

 [[package]]
 name = "lance-table"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31e1cfa3e031b5795330eec7808baa1c2e105a067adf0790e5bb9a51aa7256ff"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4374,9 +4347,8 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2847faaa98fdb2facc75ae515e553ea67e68d0b05de41ac577b8038e1bbafac8"
+version = "0.32.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.32.1-beta.2#2d57f221d3f13a96b1eac5b072c07a92b52e93cf"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -4387,7 +4359,7 @@ dependencies = [

 [[package]]
 name = "lancedb"
-version = "0.21.2-beta.1"
+version = "0.21.2"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4443,7 +4415,7 @@ dependencies = [
 "regex",
 "reqwest",
 "rstest",
- "semver 1.0.26",
+ "semver",
 "serde",
 "serde_json",
 "serde_with",
@@ -4472,34 +4444,9 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "lancedb-node"
-version = "0.21.2-beta.1"
-dependencies = [
- "arrow-array",
- "arrow-ipc",
- "arrow-schema",
- "async-trait",
- "chrono",
- "conv",
- "env_logger",
- "futures",
- "half",
- "lance",
- "lance-index",
- "lance-linalg",
- "lancedb",
- "lzma-sys",
- "neon",
- "object_store",
- "once_cell",
- "snafu",
- "tokio",
-]
-
 [[package]]
 name = "lancedb-nodejs"
-version = "0.21.2-beta.1"
+version = "0.21.2"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4519,7 +4466,7 @@ dependencies = [

 [[package]]
 name = "lancedb-python"
-version = "0.24.2-beta.1"
+version = "0.24.2"
 dependencies = [
 "arrow",
 "env_logger",
@@ -4620,16 +4567,6 @@ version = "0.2.174"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"

-[[package]]
-name = "libloading"
-version = "0.6.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "351a32417a12d5f7e82c368a66781e307834dae04c6ce0cd4456d52989229883"
-dependencies = [
- "cfg-if",
- "winapi",
-]
-
 [[package]]
 name = "libloading"
 version = "0.8.8"
@@ -5008,7 +4945,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "regex",
- "semver 1.0.26",
+ "semver",
 "syn 2.0.103",
 ]

@@ -5018,48 +4955,7 @@ version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "427802e8ec3a734331fec1035594a210ce1ff4dc5bc1950530920ab717964ea3"
 dependencies = [
- "libloading 0.8.8",
-]
-
-[[package]]
-name = "neon"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28e15415261d880aed48122e917a45e87bb82cf0260bb6db48bbab44b7464373"
-dependencies = [
- "neon-build",
- "neon-macros",
- "neon-runtime",
- "semver 0.9.0",
- "smallvec",
-]
-
-[[package]]
-name = "neon-build"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bac98a702e71804af3dacfde41edde4a16076a7bbe889ae61e56e18c5b1c811"
-
-[[package]]
-name = "neon-macros"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7288eac8b54af7913c60e0eb0e2a7683020dffa342ab3fd15e28f035ba897cf"
-dependencies = [
- "quote",
- "syn 1.0.109",
- "syn-mid",
-]
-
-[[package]]
-name = "neon-runtime"
-version = "0.10.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4676720fa8bb32c64c3d9f49c47a47289239ec46b4bdb66d0913cc512cb0daca"
-dependencies = [
- "cfg-if",
- "libloading 0.6.7",
- "smallvec",
+ "libloading",
 ]

 [[package]]
@@ -6741,7 +6637,7 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
 dependencies = [
- "semver 1.0.26",
+ "semver",
 ]

 [[package]]
@@ -7006,27 +6902,12 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "semver"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
-dependencies = [
- "semver-parser",
-]
-
 [[package]]
 name = "semver"
 version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"

-[[package]]
-name = "semver-parser"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
-
 [[package]]
 name = "seq-macro"
 version = "0.3.6"
@@ -7426,17 +7307,6 @@ dependencies = [
 "unicode-ident",
 ]

-[[package]]
-name = "syn-mid"
-version = "0.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fea305d57546cc8cd04feb14b62ec84bf17f50e3f7b12560d7bfa9265f39d9ed"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
 [[package]]
 name = "sync_wrapper"
 version = "1.0.2"
@@ -8072,7 +7942,7 @@ checksum = "90b70b37e9074642bc5f60bb23247fd072a84314ca9e71cdf8527593406a0dd3"
 dependencies = [
 "gemm 0.18.2",
 "half",
- "libloading 0.8.8",
+ "libloading",
 "memmap2 0.9.5",
 "num",
 "num-traits",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,5 @@
 [workspace]
 members = [
-    "rust/ffi/node",
    "rust/lancedb",
    "nodejs",
    "python",
@@ -21,14 +20,16 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.32.0", "features" = ["dynamodb"] }
-lance-io = "=0.32.0"
-lance-index = "=0.32.0"
-lance-linalg = "=0.32.0"
-lance-table = "=0.32.0"
-lance-testing = "=0.32.0"
-lance-datafusion = "=0.32.0"
-lance-encoding = "=0.32.0"
+lance = { "version" = "=0.32.1", "features" = [
+    "dynamodb",
+], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -1,22 +0,0 @@
-#!/bin/bash
-set -e
-ARCH=${1:-x86_64}
-TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
-
-# We pass down the current user so that when we later mount the local files
-# into the container, the files are accessible by the current user.
-pushd ci/manylinux_node
-docker build \
-    -t lancedb-node-manylinux \
-    --build-arg="ARCH=$ARCH" \
-    --build-arg="DOCKER_USER=$(id -u)" \
-    --progress=plain \
-    .
-popd
-
-# We turn on memory swap to avoid OOM killer
-docker run \
-    -v $(pwd):/io -w /io \
-    --memory-swap=-1 \
-    lancedb-node-manylinux \
-    bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
--- a/ci/build_macos_artifacts.sh
+++ b/ci/build_macos_artifacts.sh
@@ -1,34 +0,0 @@
-# Builds the macOS artifacts (node binaries).
-# Usage: ./ci/build_macos_artifacts.sh [target]
-# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
-set -e
-
-prebuild_rust() {
-    # Building here for the sake of easier debugging.
-    pushd rust/ffi/node
-    echo "Building rust library for $1"
-    export RUST_BACKTRACE=1
-    cargo build --release --target $1
-    popd
-}
-
-build_node_binaries() {
-    pushd node
-    echo "Building node library for $1"
-    npm run build-release -- --target $1
-    npm run pack-build -- --target $1
-    popd
-}
-
-if [ -n "$1" ]; then
-    targets=$1
-else
-    targets="x86_64-apple-darwin aarch64-apple-darwin"
-fi
-
-echo "Building artifacts for targets: $targets"
-for target in $targets
-    do
-    prebuild_rust $target
-    build_node_binaries $target
-done
--- a/ci/build_windows_artifacts.ps1
+++ b/ci/build_windows_artifacts.ps1
@@ -1,42 +0,0 @@
-# Builds the Windows artifacts (node binaries).
-# Usage:  .\ci\build_windows_artifacts.ps1 [target]
-# Targets supported:
-# - x86_64-pc-windows-msvc
-# - i686-pc-windows-msvc
-# - aarch64-pc-windows-msvc
-
-function Prebuild-Rust {
-    param (
-        [string]$target
-    )
-
-    # Building here for the sake of easier debugging.
-    Push-Location -Path "rust/ffi/node"
-    Write-Host "Building rust library for $target"
-    $env:RUST_BACKTRACE=1
-    cargo build --release --target $target
-    Pop-Location
-}
-
-function Build-NodeBinaries {
-    param (
-        [string]$target
-    )
-
-    Push-Location -Path "node"
-    Write-Host "Building node library for $target"
-    npm run build-release -- --target $target
-    npm run pack-build -- --target $target
-    Pop-Location
-}
-
-$targets = $args[0]
-if (-not $targets) {
-    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
-}
-
-Write-Host "Building artifacts for targets: $targets"
-foreach ($target in $targets) {
-    Prebuild-Rust $target
-    Build-NodeBinaries $target
-}
--- a/ci/build_windows_artifacts_nodejs.ps1
+++ b/ci/build_windows_artifacts_nodejs.ps1
@@ -1,42 +0,0 @@
-# Builds the Windows artifacts (nodejs binaries).
-# Usage:  .\ci\build_windows_artifacts_nodejs.ps1 [target]
-# Targets supported:
-# - x86_64-pc-windows-msvc
-# - i686-pc-windows-msvc
-# - aarch64-pc-windows-msvc
-
-function Prebuild-Rust {
-    param (
-        [string]$target
-    )
-
-    # Building here for the sake of easier debugging.
-    Push-Location -Path "rust/lancedb"
-    Write-Host "Building rust library for $target"
-    $env:RUST_BACKTRACE=1
-    cargo build --release --target $target
-    Pop-Location
-}
-
-function Build-NodeBinaries {
-    param (
-        [string]$target
-    )
-
-    Push-Location -Path "nodejs"
-    Write-Host "Building nodejs library for $target"
-    $env:RUST_TARGET=$target
-    npm run build-release
-    Pop-Location
-}
-
-$targets = $args[0]
-if (-not $targets) {
-    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
-}
-
-Write-Host "Building artifacts for targets: $targets"
-foreach ($target in $targets) {
-    Prebuild-Rust $target
-    Build-NodeBinaries $target
-}
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -1,27 +0,0 @@
-# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
-# This container allows building the node modules native libraries in an
-# environment with a very old glibc, so that we are compatible with a wide
-# range of linux distributions.
-ARG ARCH=x86_64
-
-FROM quay.io/pypa/manylinux_2_28_${ARCH}
-
-ARG ARCH=x86_64
-ARG DOCKER_USER=default_user
-
-# Protobuf is also installed as root.
-COPY install_protobuf.sh install_protobuf.sh
-RUN ./install_protobuf.sh ${ARCH}
-
-ENV DOCKER_USER=${DOCKER_USER}
-# Create a group and user, but only if it doesn't exist
-RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
-
-# We switch to the user to install Rust and Node, since those like to be
-# installed at the user level.
-USER ${DOCKER_USER}
-
-COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
-RUN cp /prepare_manylinux_node.sh $HOME/ && \
-    cd $HOME && \
-    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -1,13 +0,0 @@
-#!/bin/bash
-# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
-set -e
-ARCH=${1:-x86_64}
-TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
-
-#Alpine doesn't have .bashrc
-FILE=$HOME/.bashrc && test -f $FILE && source $FILE
-
-cd node
-npm ci
-npm run build-release
-npm run pack-build -- -t $TARGET_TRIPLE
--- a/ci/manylinux_node/install_protobuf.sh
+++ b/ci/manylinux_node/install_protobuf.sh
@@ -1,15 +0,0 @@
-#!/bin/bash
-# Installs protobuf compiler. Should be run as root.
-set -e
-
-if [[ $1 == x86_64* ]]; then
-    ARCH=x86_64
-else
-    # gnu target
-    ARCH=aarch_64
-fi
-
-PB_REL=https://github.com/protocolbuffers/protobuf/releases
-PB_VERSION=23.1
-curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
-unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_node/prepare_manylinux_node.sh
+++ b/ci/manylinux_node/prepare_manylinux_node.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-set -e
-
-install_node() {
-    echo "Installing node..."
-
-    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
-
-    source "$HOME"/.bashrc
-
-    nvm install --no-progress 18
-}
-
-install_rust() {
-    echo "Installing rust..."
-    curl https://sh.rustup.rs -sSf | bash -s -- -y
-    export PATH="$PATH:/root/.cargo/bin"
-}
-
-install_node
-install_rust
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -103,264 +103,6 @@ markdown_extensions:
      permalink: ""

 nav:
-  - Home:
-      - LanceDB: index.md
-      - 🏃🏼‍♂️ Quick start: basic.md
-      - 📚 Concepts:
-          - Vector search: concepts/vector_search.md
-          - Indexing:
-              - IVFPQ: concepts/index_ivfpq.md
-              - HNSW: concepts/index_hnsw.md
-          - Storage: concepts/storage.md
-          - Data management: concepts/data_management.md
-      - 🔨 Guides:
-          - Working with tables: guides/tables.md
-          - Building a vector index: ann_indexes.md
-          - Vector Search: search.md
-          - Full-text search (native): fts.md
-          - Full-text search (tantivy-based): fts_tantivy.md
-          - Building a scalar index: guides/scalar_index.md
-          - Hybrid search:
-              - Overview: hybrid_search/hybrid_search.md
-              - Comparing Rerankers: hybrid_search/eval.md
-              - Airbnb financial data example: notebooks/hybrid_search.ipynb
-          - Late interaction with MultiVector search:
-              - Overview: guides/multi-vector.md
-              - Example: notebooks/Multivector_on_LanceDB.ipynb
-          - RAG:
-              - Vanilla RAG: rag/vanilla_rag.md
-              - Multi-head RAG: rag/multi_head_rag.md
-              - Corrective RAG: rag/corrective_rag.md
-              - Agentic RAG: rag/agentic_rag.md
-              - Graph RAG: rag/graph_rag.md
-              - Self RAG: rag/self_rag.md
-              - Adaptive RAG: rag/adaptive_rag.md
-              - SFR RAG: rag/sfr_rag.md
-              - Advanced Techniques:
-                  - HyDE: rag/advanced_techniques/hyde.md
-                  - FLARE: rag/advanced_techniques/flare.md
-          - Reranking:
-              - Quickstart: reranking/index.md
-              - Cohere Reranker: reranking/cohere.md
-              - Linear Combination Reranker: reranking/linear_combination.md
-              - Reciprocal Rank Fusion Reranker: reranking/rrf.md
-              - Cross Encoder Reranker: reranking/cross_encoder.md
-              - ColBERT Reranker: reranking/colbert.md
-              - Jina Reranker: reranking/jina.md
-              - OpenAI Reranker: reranking/openai.md
-              - AnswerDotAi Rerankers: reranking/answerdotai.md
-              - Voyage AI Rerankers: reranking/voyageai.md
-              - Building Custom Rerankers: reranking/custom_reranker.md
-              - Example: notebooks/lancedb_reranking.ipynb
-          - Filtering: sql.md
-          - Versioning & Reproducibility:
-              - sync API: notebooks/reproducibility.ipynb
-              - async API: notebooks/reproducibility_async.ipynb
-          - Configuring Storage: guides/storage.md
-          - Migration Guide: migration.md
-          - Tuning retrieval performance:
-              - Choosing right query type: guides/tuning_retrievers/1_query_types.md
-              - Reranking: guides/tuning_retrievers/2_reranking.md
-              - Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
-      - 🧬 Managing embeddings:
-          - Understand Embeddings: embeddings/understanding_embeddings.md
-          - Get Started: embeddings/index.md
-          - Embedding functions: embeddings/embedding_functions.md
-          - Available models:
-              - Overview: embeddings/default_embedding_functions.md
-              - Text Embedding Functions:
-                  - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
-                  - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
-                  - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
-                  - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
-                  - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
-                  - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
-                  - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
-                  - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
-                  - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
-                  - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
-                  - Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
-              - Multimodal Embedding Functions:
-                  - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
-                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
-                  - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
-          - User-defined embedding functions: embeddings/custom_embedding_function.md
-          - Variables and secrets: embeddings/variables_and_secrets.md
-          - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
-          - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
-      - 🔌 Integrations:
-          - Tools and data formats: integrations/index.md
-          - Pandas and PyArrow: python/pandas_and_pyarrow.md
-          - Polars: python/polars_arrow.md
-          - DuckDB: python/duckdb.md
-          - Datafusion: python/datafusion.md
-          - LangChain:
-              - LangChain 🔗: integrations/langchain.md
-              - LangChain demo: notebooks/langchain_demo.ipynb
-              - LangChain JS/TS 🔗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
-          - LlamaIndex 🦙:
-              - LlamaIndex docs: integrations/llamaIndex.md
-              - LlamaIndex demo: notebooks/llamaIndex_demo.ipynb
-          - Pydantic: python/pydantic.md
-          - Voxel51: integrations/voxel51.md
-          - PromptTools: integrations/prompttools.md
-          - dlt: integrations/dlt.md
-          - phidata: integrations/phidata.md
-          - Genkit: integrations/genkit.md
-      - 🎯 Examples:
-          - Overview: examples/index.md
-          - 🐍 Python:
-              - Overview: examples/examples_python.md
-              - Build From Scratch: examples/python_examples/build_from_scratch.md
-              - Multimodal: examples/python_examples/multimodal.md
-              - Rag: examples/python_examples/rag.md
-              - Vector Search: examples/python_examples/vector_search.md
-              - Chatbot: examples/python_examples/chatbot.md
-              - Evaluation: examples/python_examples/evaluations.md
-              - AI Agent: examples/python_examples/aiagent.md
-              - Recommender System: examples/python_examples/recommendersystem.md
-              - Miscellaneous:
-                  - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
-                  - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
-          - 👾 JavaScript:
-              - Overview: examples/examples_js.md
-              - Serverless Website Chatbot: examples/serverless_website_chatbot.md
-              - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
-              - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
-          - 🦀 Rust:
-              - Overview: examples/examples_rust.md
-      - 📓 Studies:
-          - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
-      - 💭 FAQs: faq.md
-      - 🔍 Troubleshooting: troubleshooting.md
-      - ⚙️ API reference:
-          - 🐍 Python: python/python.md
-          - 👾 JavaScript (vectordb): javascript/modules.md
-          - 👾 JavaScript (lancedb): js/globals.md
-          - 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
-
-  - Quick start: basic.md
-  - Concepts:
-      - Vector search: concepts/vector_search.md
-      - Indexing:
-          - IVFPQ: concepts/index_ivfpq.md
-          - HNSW: concepts/index_hnsw.md
-      - Storage: concepts/storage.md
-      - Data management: concepts/data_management.md
-  - Guides:
-      - Working with tables: guides/tables.md
-      - Working with SQL: guides/sql_querying.md
-      - Building an ANN index: ann_indexes.md
-      - Vector Search: search.md
-      - Full-text search (native): fts.md
-      - Full-text search (tantivy-based): fts_tantivy.md
-      - Building a scalar index: guides/scalar_index.md
-      - Hybrid search:
-          - Overview: hybrid_search/hybrid_search.md
-          - Comparing Rerankers: hybrid_search/eval.md
-          - Airbnb financial data example: notebooks/hybrid_search.ipynb
-      - Late interaction with MultiVector search:
-          - Overview: guides/multi-vector.md
-          - Document search Example: notebooks/Multivector_on_LanceDB.ipynb
-      - RAG:
-          - Vanilla RAG: rag/vanilla_rag.md
-          - Multi-head RAG: rag/multi_head_rag.md
-          - Corrective RAG: rag/corrective_rag.md
-          - Agentic RAG: rag/agentic_rag.md
-          - Graph RAG: rag/graph_rag.md
-          - Self RAG: rag/self_rag.md
-          - Adaptive RAG: rag/adaptive_rag.md
-          - SFR RAG: rag/sfr_rag.md
-          - Advanced Techniques:
-              - HyDE: rag/advanced_techniques/hyde.md
-              - FLARE: rag/advanced_techniques/flare.md
-      - Reranking:
-          - Quickstart: reranking/index.md
-          - Cohere Reranker: reranking/cohere.md
-          - Linear Combination Reranker: reranking/linear_combination.md
-          - Reciprocal Rank Fusion Reranker: reranking/rrf.md
-          - Cross Encoder Reranker: reranking/cross_encoder.md
-          - ColBERT Reranker: reranking/colbert.md
-          - Jina Reranker: reranking/jina.md
-          - OpenAI Reranker: reranking/openai.md
-          - AnswerDotAi Rerankers: reranking/answerdotai.md
-          - Building Custom Rerankers: reranking/custom_reranker.md
-          - Example: notebooks/lancedb_reranking.ipynb
-      - Filtering: sql.md
-      - Versioning & Reproducibility:
-          - sync API: notebooks/reproducibility.ipynb
-          - async API: notebooks/reproducibility_async.ipynb
-      - Configuring Storage: guides/storage.md
-      - Migration Guide: migration.md
-      - Tuning retrieval performance:
-          - Choosing right query type: guides/tuning_retrievers/1_query_types.md
-          - Reranking: guides/tuning_retrievers/2_reranking.md
-          - Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
-  - Managing Embeddings:
-      - Understand Embeddings: embeddings/understanding_embeddings.md
-      - Get Started: embeddings/index.md
-      - Embedding functions: embeddings/embedding_functions.md
-      - Available models:
-          - Overview: embeddings/default_embedding_functions.md
-          - Text Embedding Functions:
-              - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
-              - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
-              - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
-              - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
-              - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
-              - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
-              - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
-              - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
-              - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
-              - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
-          - Multimodal Embedding Functions:
-              - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
-              - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
-              - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
-      - User-defined embedding functions: embeddings/custom_embedding_function.md
-      - Variables and secrets: embeddings/variables_and_secrets.md
-      - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
-      - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
-  - Integrations:
-      - Overview: integrations/index.md
-      - Pandas and PyArrow: python/pandas_and_pyarrow.md
-      - Polars: python/polars_arrow.md
-      - DuckDB: python/duckdb.md
-      - Datafusion: python/datafusion.md
-      - LangChain 🦜️🔗↗: integrations/langchain.md
-      - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
-      - LlamaIndex 🦙↗: integrations/llamaIndex.md
-      - Pydantic: python/pydantic.md
-      - Voxel51: integrations/voxel51.md
-      - PromptTools: integrations/prompttools.md
-      - dlt: integrations/dlt.md
-      - phidata: integrations/phidata.md
-      - Genkit: integrations/genkit.md
-  - Examples:
-      - examples/index.md
-      - 🐍 Python:
-          - Overview: examples/examples_python.md
-          - Build From Scratch: examples/python_examples/build_from_scratch.md
-          - Multimodal: examples/python_examples/multimodal.md
-          - Rag: examples/python_examples/rag.md
-          - Vector Search: examples/python_examples/vector_search.md
-          - Chatbot: examples/python_examples/chatbot.md
-          - Evaluation: examples/python_examples/evaluations.md
-          - AI Agent: examples/python_examples/aiagent.md
-          - Recommender System: examples/python_examples/recommendersystem.md
-          - Miscellaneous:
-              - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
-              - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
-      - 👾 JavaScript:
-          - Overview: examples/examples_js.md
-          - Serverless Website Chatbot: examples/serverless_website_chatbot.md
-          - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
-          - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
-      - 🦀 Rust:
-          - Overview: examples/examples_rust.md
-  - Studies:
-      - studies/overview.md
-      - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
  - API reference:
      - Overview: api_reference.md
      - Python: python/python.md
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.1</version>
+        <version>0.21.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.1</version>
+        <version>0.21.2-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.2-beta.1</version>
+    <version>0.21.2-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/node/.eslintrc.js
+++ b/node/.eslintrc.js
@@ -1,22 +0,0 @@
-module.exports = {
-  env: {
-    browser: true,
-    es2021: true
-  },
-  extends: 'standard-with-typescript',
-  overrides: [
-  ],
-  parserOptions: {
-    project: './tsconfig.json',
-    ecmaVersion: 'latest',
-    sourceType: 'module'
-  },
-  rules: {
-    "@typescript-eslint/method-signature-style": "off",
-    "@typescript-eslint/quotes": "off",
-    "@typescript-eslint/semi": "off",
-    "@typescript-eslint/explicit-function-return-type": "off",
-    "@typescript-eslint/space-before-function-paren": "off",
-    "@typescript-eslint/indent": "off",
-  }
-}
--- a/node/.npmignore
+++ b/node/.npmignore
@@ -1,4 +0,0 @@
-gen_test_data.py
-index.node
-dist/lancedb*.tgz
-vectordb*.tgz
--- a/node/CHANGELOG.md
+++ b/node/CHANGELOG.md
@@ -1,64 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-
-## [0.1.5] - 2023-06-00
-
-### Added
-
- Support for macOS X86
-
-## [0.1.4] - 2023-06-03
-
-### Added
-
- Select / Project query API
-
-### Changed
-
-  Deprecated created_index in favor of createIndex
-
-## [0.1.3] - 2023-06-01
-
-### Added
-
- Support S3 and Google Cloud Storage
- Embedding functions support
- OpenAI embedding function
-
-## [0.1.2] - 2023-05-27
-
-### Added
-
- Append records API
- Extra query params to to nodejs client
- Create_index API
- 
-### Fixed
-
- bugfix: string columns should be converted to Utf8Array (#94)
-
-## [0.1.1] - 2023-05-16
-
-### Added
-
- create_table API
- limit parameter for queries
- Typescript / JavaScript examples
- Linux support
-
-## [0.1.0] - 2023-05-16
-
-### Added
-
- Initial  JavaScript / Node.js library for LanceDB
- Read-only api to query LanceDB datasets
- Supports macOS arm only
-
-## [pre-0.1.0]
-
- Various prototypes / test builds
-
--- a/node/README.md
+++ b/node/README.md
@@ -1,66 +0,0 @@
-# LanceDB
-
-A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
-
-**DEPRECATED: This library is deprecated. Please use the new client,
-[@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
-
-## Installation
-
-```bash
-npm install vectordb
-```
-
-This will download the appropriate native library for your platform. We currently
-support:
-
-* Linux (x86_64 and aarch64)
-* MacOS (Intel and ARM/M1/M2)
-* Windows (x86_64 only)
-
-We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
-
-## Usage
-
-### Basic Example
-
-```javascript
-const lancedb = require('vectordb');
-const db = await lancedb.connect('data/sample-lancedb');
-const table = await db.createTable("my_table",
-      [{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
-      { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
-const results = await table.search([0.1, 0.3]).limit(20).execute();
-console.log(results);
-```
-
-The [examples](./examples) folder contains complete examples.
-
-## Development
-
-To build everything fresh:
-
-```bash
-npm install
-npm run build
-```
-
-Then you should be able to run the tests with:
-
-```bash
-npm test
-```
-
-### Fix lints
-
-To run the linter and have it automatically fix all errors
-
-```bash
-npm run lint -- --fix
-```
-
-To build documentation
-
-```bash
-npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
-```
--- a/node/examples/js-openai/index.js
+++ b/node/examples/js-openai/index.js
@@ -1,41 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-async function example () {
-  const lancedb = require('vectordb')
-  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
-  const apiKey = process.env.OPENAI_API_KEY
-  // The embedding function will create embeddings for the 'text' column(text in this case)
-  const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
-
-  const db = await lancedb.connect('data/sample-lancedb')
-
-  const data = [
-    { id: 1, text: 'Black T-Shirt', price: 10 },
-    { id: 2, text: 'Leather Jacket', price: 50 }
-  ]
-
-  const table = await db.createTable('vectors', data, embedding)
-  console.log(await db.tableNames())
-
-  const results = await table
-    .search('keeps me warm')
-    .limit(1)
-    .execute()
-  console.log(results[0].text)
-}
-
-example().then(_ => { console.log('All done!') })
--- a/node/examples/js-openai/package.json
+++ b/node/examples/js-openai/package.json
@@ -1,15 +0,0 @@
-{
-  "name": "vectordb-example-js-openai",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../..",
-    "openai": "^3.2.1"
-  }
-}
--- a/node/examples/js-transformers/index.js
+++ b/node/examples/js-transformers/index.js
@@ -1,66 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-
-async function example() {
-
-    const lancedb = require('vectordb')
-
-    // Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
-    const { pipeline } = await import('@xenova/transformers')
-    const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
-
-
-    // Create embedding function from pipeline which returns a list of vectors from batch
-    // sourceColumn is the name of the column in the data to be embedded
-    //
-    // Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
-    const embed_fun = {}
-    embed_fun.sourceColumn = 'text'
-    embed_fun.embed = async function (batch) {
-        let result = []
-        for (let text of batch) {
-            const res = await pipe(text, { pooling: 'mean', normalize: true })
-            result.push(Array.from(res['data']))
-        }
-        return (result)
-    }
-
-    // Link a folder and create a table with data
-    const db = await lancedb.connect('data/sample-lancedb')
-
-    const data = [
-        { id: 1, text: 'Cherry', type: 'fruit' },
-        { id: 2, text: 'Carrot', type: 'vegetable' },
-        { id: 3, text: 'Potato', type: 'vegetable' },
-        { id: 4, text: 'Apple', type: 'fruit' },
-        { id: 5, text: 'Banana', type: 'fruit' }
-    ]
-
-    const table = await db.createTable('food_table', data, embed_fun)
-
-
-    // Query the table
-    const results = await table
-        .search("a sweet fruit to eat")
-        .metricType("cosine")
-        .limit(2)
-        .execute()
-    console.log(results.map(r => r.text))
-
-}
-
-example().then(_ => { console.log("Done!") })
--- a/node/examples/js-transformers/package.json
+++ b/node/examples/js-transformers/package.json
@@ -1,16 +0,0 @@
-{
-  "name": "vectordb-example-js-transformers",
-  "version": "1.0.0",
-  "description": "Example for using transformers.js with lancedb",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "@xenova/transformers": "^2.4.1",
-    "vectordb": "file:../.."
-  }
-
-}
--- a/node/examples/js-youtube-transcripts/index.js
+++ b/node/examples/js-youtube-transcripts/index.js
@@ -1,122 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-const lancedb = require('vectordb')
-const fs = require('fs/promises')
-const readline = require('readline/promises')
-const { stdin: input, stdout: output } = require('process')
-const { Configuration, OpenAIApi } = require('openai')
-
-// Download file from XYZ
-const INPUT_FILE_NAME = 'data/youtube-transcriptions_sample.jsonl';
-
-(async () => {
-  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
-  const apiKey = process.env.OPENAI_API_KEY
-  // The embedding function will create embeddings for the 'context' column
-  const embedFunction = new lancedb.OpenAIEmbeddingFunction('context', apiKey)
-
-  // Connects to LanceDB
-  const db = await lancedb.connect('data/youtube-lancedb')
-
-  // Open the vectors table or create one if it does not exist
-  let tbl
-  if ((await db.tableNames()).includes('vectors')) {
-    tbl = await db.openTable('vectors', embedFunction)
-  } else {
-    tbl = await createEmbeddingsTable(db, embedFunction)
-  }
-
-  // Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
-  const configuration = new Configuration({ apiKey })
-  const openai = new OpenAIApi(configuration)
-  const rl = readline.createInterface({ input, output })
-  try {
-    while (true) {
-      const query = await rl.question('Prompt: ')
-      const results = await tbl
-        .search(query)
-        .select(['title', 'text', 'context'])
-        .limit(3)
-        .execute()
-
-      // console.table(results)
-
-      const response = await openai.createCompletion({
-        model: 'text-davinci-003',
-        prompt: createPrompt(query, results),
-        max_tokens: 400,
-        temperature: 0,
-        top_p: 1,
-        frequency_penalty: 0,
-        presence_penalty: 0
-      })
-      console.log(response.data.choices[0].text)
-    }
-  } catch (err) {
-    console.log('Error: ', err)
-  } finally {
-    rl.close()
-  }
-  process.exit(1)
-})()
-
-async function createEmbeddingsTable (db, embedFunction) {
-  console.log(`Creating embeddings from ${INPUT_FILE_NAME}`)
-  // read the input file into a JSON array, skipping empty lines
-  const lines = (await fs.readFile(INPUT_FILE_NAME, 'utf-8'))
-    .toString()
-    .split('\n')
-    .filter(line => line.length > 0)
-    .map(line => JSON.parse(line))
-
-  const data = contextualize(lines, 20, 'video_id')
-  return await db.createTable('vectors', data, embedFunction)
-}
-
-// Each transcript has a small text column, we include previous transcripts in order to
-// have more context information when creating embeddings
-function contextualize (rows, contextSize, groupColumn) {
-  const grouped = []
-  rows.forEach(row => {
-    if (!grouped[row[groupColumn]]) {
-      grouped[row[groupColumn]] = []
-    }
-    grouped[row[groupColumn]].push(row)
-  })
-
-  const data = []
-  Object.keys(grouped).forEach(key => {
-    for (let i = 0; i < grouped[key].length; i++) {
-      const start = i - contextSize > 0 ? i - contextSize : 0
-      grouped[key][i].context = grouped[key].slice(start, i + 1).map(r => r.text).join(' ')
-    }
-    data.push(...grouped[key])
-  })
-  return data
-}
-
-// Creates a prompt by aggregating all relevant contexts
-function createPrompt (query, context) {
-  let prompt =
-      'Answer the question based on the context below.\n\n' +
-      'Context:\n'
-
-  // need to make sure our prompt is not larger than max size
-  prompt = prompt + context.map(c => c.context).join('\n\n---\n\n').substring(0, 3750)
-  prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`
-  return prompt
-}
--- a/node/examples/js-youtube-transcripts/package.json
+++ b/node/examples/js-youtube-transcripts/package.json
@@ -1,15 +0,0 @@
-{
-  "name": "vectordb-example-js-openai",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../..",
-    "openai": "^3.2.1"
-  }
-}
--- a/node/examples/js/index.js
+++ b/node/examples/js/index.js
@@ -1,36 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-'use strict'
-
-async function example () {
-  const lancedb = require('vectordb')
-  const db = await lancedb.connect('data/sample-lancedb')
-
-  const data = [
-    { id: 1, vector: [0.1, 0.2], price: 10 },
-    { id: 2, vector: [1.1, 1.2], price: 50 }
-  ]
-
-  const table = await db.createTable('vectors', data)
-  console.log(await db.tableNames())
-
-  const results = await table
-      .search([0.1, 0.3])
-      .limit(20)
-      .execute()
-  console.log(results)
-}
-
-example()
--- a/node/examples/js/package.json
+++ b/node/examples/js/package.json
@@ -1,14 +0,0 @@
-{
-  "name": "vectordb-example-js",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "dependencies": {
-    "vectordb": "file:../.."
-  }
-}
--- a/node/examples/ts/package.json
+++ b/node/examples/ts/package.json
@@ -1,22 +0,0 @@
-{
-  "name": "vectordb-example-ts",
-  "version": "1.0.0",
-  "description": "",
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "scripts": {
-    "tsc": "tsc -b",
-    "build": "tsc"
-  },
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "devDependencies": {
-    "@types/node": "^18.16.2",
-    "ts-node": "^10.9.1",
-    "ts-node-dev": "^2.0.0",
-    "typescript": "*"
-  },
-  "dependencies": {
-    "vectordb": "file:../.."
-  }
-}
--- a/node/examples/ts/src/index.ts
+++ b/node/examples/ts/src/index.ts
@@ -1,35 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import * as vectordb from 'vectordb';
-
-async function example () {
-    const db = await vectordb.connect('data/sample-lancedb')
-
-    const data = [
-        { id: 1, vector: [0.1, 0.2], price: 10 },
-        { id: 2, vector: [1.1, 1.2], price: 50 }
-    ]
-
-    const table = await db.createTable('vectors', data)
-    console.log(await db.tableNames())
-
-    const results = await table
-        .search([0.1, 0.3])
-        .limit(20)
-        .execute()
-    console.log(results)
-}
-
-example().then(_ => { console.log ("All done!") })
--- a/node/examples/ts/tsconfig.json
+++ b/node/examples/ts/tsconfig.json
@@ -1,10 +0,0 @@
-{
-  "include": ["src/**/*.ts"],
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "commonjs",
-    "declaration": true,
-    "outDir": "./dist",
-    "strict": true
-  }
-}
--- a/node/native.js
+++ b/node/native.js
@@ -1,36 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-const { currentTarget } = require('@neon-rs/load')
-
-let nativeLib
-
-try {
-  // When developing locally, give preference to the local built library
-  nativeLib = require('./index.node')
-} catch {
-  try {
-    nativeLib = require(`@lancedb/vectordb-${currentTarget()}`)
-  } catch (e) {
-    throw new Error(`vectordb: failed to load native library.
-  You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
-
-  If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
-      
-  Source error: ${e}`)
-  }
-}
-
-// Dynamic require for runtime.
-module.exports = nativeLib
--- a/node/package-lock.json
+++ b/node/package-lock.json
--- a/node/package.json
+++ b/node/package.json
@@ -1,98 +0,0 @@
-{
-  "name": "vectordb",
-  "version": "0.21.2-beta.1",
-  "description": " Serverless, low-latency vector database for AI applications",
-  "private": false,
-  "main": "dist/index.js",
-  "types": "dist/index.d.ts",
-  "scripts": {
-    "tsc": "tsc -b",
-    "build": "npm run tsc && cargo-cp-artifact --artifact cdylib lancedb_node index.node -- cargo build -p lancedb-node --message-format=json",
-    "build-release": "npm run build -- --release",
-    "test": "npm run tsc && mocha -recursive dist/test",
-    "integration-test": "npm run tsc && mocha -recursive dist/integration_test",
-    "lint": "eslint native.js src --ext .js,.ts",
-    "clean": "rm -rf node_modules *.node dist/",
-    "pack-build": "neon pack-build",
-    "check-npm": "printenv && which node && which npm && npm --version"
-  },
-  "repository": {
-    "type": "git",
-    "url": "https://github.com/lancedb/lancedb.git"
-  },
-  "homepage": "https://lancedb.github.io/lancedb/",
-  "bugs": {
-    "url": "https://github.com/lancedb/lancedb/issues"
-  },
-  "keywords": [
-    "data-format",
-    "data-science",
-    "machine-learning",
-    "data-analytics"
-  ],
-  "author": "Lance Devs",
-  "license": "Apache-2.0",
-  "devDependencies": {
-    "@neon-rs/cli": "^0.0.160",
-    "@types/chai": "^4.3.4",
-    "@types/chai-as-promised": "^7.1.5",
-    "@types/mocha": "^10.0.1",
-    "@types/node": "^18.16.2",
-    "@types/sinon": "^10.0.15",
-    "@types/temp": "^0.9.1",
-    "@types/uuid": "^9.0.3",
-    "@typescript-eslint/eslint-plugin": "^5.59.1",
-    "apache-arrow-old": "npm:apache-arrow@13.0.0",
-    "cargo-cp-artifact": "^0.1",
-    "chai": "^4.3.7",
-    "chai-as-promised": "^7.1.1",
-    "eslint": "^8.39.0",
-    "eslint-config-standard-with-typescript": "^34.0.1",
-    "eslint-plugin-import": "^2.26.0",
-    "eslint-plugin-n": "^15.7.0",
-    "eslint-plugin-promise": "^6.1.1",
-    "mocha": "^10.2.0",
-    "openai": "^4.24.1",
-    "sinon": "^15.1.0",
-    "temp": "^0.9.4",
-    "ts-node": "^10.9.1",
-    "ts-node-dev": "^2.0.0",
-    "typedoc": "^0.24.7",
-    "typedoc-plugin-markdown": "^3.15.3",
-    "typescript": "^5.1.0",
-    "uuid": "^9.0.0"
-  },
-  "dependencies": {
-    "@neon-rs/load": "^0.0.74",
-    "axios": "^1.4.0"
-  },
-  "peerDependencies": {
-    "@apache-arrow/ts": "^14.0.2",
-    "apache-arrow": "^14.0.2"
-  },
-  "os": [
-    "darwin",
-    "linux",
-    "win32"
-  ],
-  "cpu": [
-    "x64",
-    "arm64"
-  ],
-  "neon": {
-    "targets": {
-      "x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
-      "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
-      "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
-      "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
-      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
-    }
-  },
-  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
-    "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
-    "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
-    "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
-  }
-}
--- a/node/src/arrow.ts
+++ b/node/src/arrow.ts
@@ -1,635 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import {
-  Field,
-  makeBuilder,
-  RecordBatchFileWriter,
-  Utf8,
-  type Vector,
-  FixedSizeList,
-  vectorFromArray,
-  Schema,
-  Table as ArrowTable,
-  RecordBatchStreamWriter,
-  List,
-  RecordBatch,
-  makeData,
-  Struct,
-  type Float,
-  DataType,
-  Binary,
-  Float32
-} from "apache-arrow";
-import { type EmbeddingFunction } from "./index";
-import { sanitizeSchema } from "./sanitize";
-
-/*
- * Options to control how a column should be converted to a vector array
- */
-export class VectorColumnOptions {
-  /** Vector column type. */
-  type: Float = new Float32();
-
-  constructor(values?: Partial<VectorColumnOptions>) {
-    Object.assign(this, values);
-  }
-}
-
-/** Options to control the makeArrowTable call. */
-export class MakeArrowTableOptions {
-  /*
-   * Schema of the data.
-   *
-   * If this is not provided then the data type will be inferred from the
-   * JS type.  Integer numbers will become int64, floating point numbers
-   * will become float64 and arrays will become variable sized lists with
-   * the data type inferred from the first element in the array.
-   *
-   * The schema must be specified if there are no records (e.g. to make
-   * an empty table)
-   */
-  schema?: Schema;
-
-  /*
-   * Mapping from vector column name to expected type
-   *
-   * Lance expects vector columns to be fixed size list arrays (i.e. tensors)
-   * However, `makeArrowTable` will not infer this by default (it creates
-   * variable size list arrays).  This field can be used to indicate that a column
-   * should be treated as a vector column and converted to a fixed size list.
-   *
-   * The keys should be the names of the vector columns.  The value specifies the
-   * expected data type of the vector columns.
-   *
-   * If `schema` is provided then this field is ignored.
-   *
-   * By default, the column named "vector" will be assumed to be a float32
-   * vector column.
-   */
-  vectorColumns: Record<string, VectorColumnOptions> = {
-    vector: new VectorColumnOptions()
-  };
-
-  embeddings?: EmbeddingFunction<any>;
-
-  /**
-   * If true then string columns will be encoded with dictionary encoding
-   *
-   * Set this to true if your string columns tend to repeat the same values
-   * often.  For more precise control use the `schema` property to specify the
-   * data type for individual columns.
-   *
-   * If `schema` is provided then this property is ignored.
-   */
-  dictionaryEncodeStrings: boolean = false;
-
-  constructor(values?: Partial<MakeArrowTableOptions>) {
-    Object.assign(this, values);
-  }
-}
-
-/**
- * An enhanced version of the {@link makeTable} function from Apache Arrow
- * that supports nested fields and embeddings columns.
- *
- * This function converts an array of Record<String, any> (row-major JS objects)
- * to an Arrow Table (a columnar structure)
- *
- * Note that it currently does not support nulls.
- *
- * If a schema is provided then it will be used to determine the resulting array
- * types.  Fields will also be reordered to fit the order defined by the schema.
- *
- * If a schema is not provided then the types will be inferred and the field order
- * will be controlled by the order of properties in the first record.
- *
- * If the input is empty then a schema must be provided to create an empty table.
- *
- * When a schema is not specified then data types will be inferred.  The inference
- * rules are as follows:
- *
- *  - boolean => Bool
- *  - number => Float64
- *  - String => Utf8
- *  - Buffer => Binary
- *  - Record<String, any> => Struct
- *  - Array<any> => List
- *
- * @param data input data
- * @param options options to control the makeArrowTable call.
- *
- * @example
- *
- * ```ts
- *
- * import { fromTableToBuffer, makeArrowTable } from "../arrow";
- * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
- *
- * const schema = new Schema([
- *   new Field("a", new Int32()),
- *   new Field("b", new Float32()),
- *   new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
- *  ]);
- *  const table = makeArrowTable([
- *    { a: 1, b: 2, c: [1, 2, 3] },
- *    { a: 4, b: 5, c: [4, 5, 6] },
- *    { a: 7, b: 8, c: [7, 8, 9] },
- *  ], { schema });
- * ```
- *
- * By default it assumes that the column named `vector` is a vector column
- * and it will be converted into a fixed size list array of type float32.
- * The `vectorColumns` option can be used to support other vector column
- * names and data types.
- *
- * ```ts
- *
- * const schema = new Schema([
-    new Field("a", new Float64()),
-    new Field("b", new Float64()),
-    new Field(
-      "vector",
-      new FixedSizeList(3, new Field("item", new Float32()))
-    ),
-  ]);
-  const table = makeArrowTable([
-    { a: 1, b: 2, vector: [1, 2, 3] },
-    { a: 4, b: 5, vector: [4, 5, 6] },
-    { a: 7, b: 8, vector: [7, 8, 9] },
-  ]);
-  assert.deepEqual(table.schema, schema);
- * ```
- *
- * You can specify the vector column types and names using the options as well
- *
- * ```typescript
- *
- * const schema = new Schema([
-    new Field('a', new Float64()),
-    new Field('b', new Float64()),
-    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
-    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
-  ]);
- * const table = makeArrowTable([
-    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
-  ], {
-    vectorColumns: {
-      vec1: { type: new Float16() },
-      vec2: { type: new Float16() }
-    }
-  }
- * assert.deepEqual(table.schema, schema)
- * ```
- */
-export function makeArrowTable(
-  data: Array<Record<string, any>>,
-  options?: Partial<MakeArrowTableOptions>
-): ArrowTable {
-  if (
-    data.length === 0 &&
-    (options?.schema === undefined || options?.schema === null)
-  ) {
-    throw new Error("At least one record or a schema needs to be provided");
-  }
-
-  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
-  if (opt.schema !== undefined && opt.schema !== null) {
-    opt.schema = sanitizeSchema(opt.schema);
-    opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
-  }
-
-  const columns: Record<string, Vector> = {};
-  // TODO: sample dataset to find missing columns
-  // Prefer the field ordering of the schema, if present
-  const columnNames =
-    opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
-  for (const colName of columnNames) {
-    if (
-      data.length !== 0 &&
-      !Object.prototype.hasOwnProperty.call(data[0], colName)
-    ) {
-      // The field is present in the schema, but not in the data, skip it
-      continue;
-    }
-    // Extract a single column from the records (transpose from row-major to col-major)
-    let values = data.map((datum) => datum[colName]);
-
-    // By default (type === undefined) arrow will infer the type from the JS type
-    let type;
-    if (opt.schema !== undefined) {
-      // If there is a schema provided, then use that for the type instead
-      type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
-      if (DataType.isInt(type) && type.bitWidth === 64) {
-        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
-        values = values.map((v) => {
-          if (v === null) {
-            return v;
-          }
-          return BigInt(v);
-        });
-      }
-    } else {
-      // Otherwise, check to see if this column is one of the vector columns
-      // defined by opt.vectorColumns and, if so, use the fixed size list type
-      const vectorColumnOptions = opt.vectorColumns[colName];
-      if (vectorColumnOptions !== undefined) {
-        type = newVectorType(values[0].length, vectorColumnOptions.type);
-      }
-    }
-
-    try {
-      // Convert an Array of JS values to an arrow vector
-      columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
-    } catch (error: unknown) {
-      // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-      throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
-    }
-  }
-
-  if (opt.schema != null) {
-    // `new ArrowTable(columns)` infers a schema which may sometimes have
-    // incorrect nullability (it assumes nullable=true if there are 0 rows)
-    //
-    // `new ArrowTable(schema, columns)` will also fail because it will create a
-    // batch with an inferred schema and then complain that the batch schema
-    // does not match the provided schema.
-    //
-    // To work around this we first create a table with the wrong schema and
-    // then patch the schema of the batches so we can use
-    // `new ArrowTable(schema, batches)` which does not do any schema inference
-    const firstTable = new ArrowTable(columns);
-    const batchesFixed = firstTable.batches.map(
-      // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-      (batch) => new RecordBatch(opt.schema!, batch.data)
-    );
-    return new ArrowTable(opt.schema, batchesFixed);
-  } else {
-    return new ArrowTable(columns);
-  }
-}
-
-/**
- * Create an empty Arrow table with the provided schema
- */
-export function makeEmptyTable(schema: Schema): ArrowTable {
-  return makeArrowTable([], { schema });
-}
-
-// Helper function to convert Array<Array<any>> to a variable sized list array
-function makeListVector(lists: any[][]): Vector<any> {
-  if (lists.length === 0 || lists[0].length === 0) {
-    throw Error("Cannot infer list vector from empty array or empty list");
-  }
-  const sampleList = lists[0];
-  let inferredType;
-  try {
-    const sampleVector = makeVector(sampleList);
-    inferredType = sampleVector.type;
-  } catch (error: unknown) {
-    // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-    throw Error(`Cannot infer list vector.  Cannot infer inner type: ${error}`);
-  }
-
-  const listBuilder = makeBuilder({
-    type: new List(new Field("item", inferredType, true))
-  });
-  for (const list of lists) {
-    listBuilder.append(list);
-  }
-  return listBuilder.finish().toVector();
-}
-
-// Helper function to convert an Array of JS values to an Arrow Vector
-function makeVector(
-  values: any[],
-  type?: DataType,
-  stringAsDictionary?: boolean
-): Vector<any> {
-  if (type !== undefined) {
-    // No need for inference, let Arrow create it
-    return vectorFromArray(values, type);
-  }
-  if (values.length === 0) {
-    throw Error(
-      "makeVector requires at least one value or the type must be specfied"
-    );
-  }
-  const sampleValue = values.find((val) => val !== null && val !== undefined);
-  if (sampleValue === undefined) {
-    throw Error(
-      "makeVector cannot infer the type if all values are null or undefined"
-    );
-  }
-  if (Array.isArray(sampleValue)) {
-    // Default Arrow inference doesn't handle list types
-    return makeListVector(values);
-  } else if (Buffer.isBuffer(sampleValue)) {
-    // Default Arrow inference doesn't handle Buffer
-    return vectorFromArray(values, new Binary());
-  } else if (
-    !(stringAsDictionary ?? false) &&
-    (typeof sampleValue === "string" || sampleValue instanceof String)
-  ) {
-    // If the type is string then don't use Arrow's default inference unless dictionaries are requested
-    // because it will always use dictionary encoding for strings
-    return vectorFromArray(values, new Utf8());
-  } else {
-    // Convert a JS array of values to an arrow vector
-    return vectorFromArray(values);
-  }
-}
-
-async function applyEmbeddings<T>(
-  table: ArrowTable,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<ArrowTable> {
-  if (embeddings == null) {
-    return table;
-  }
-  if (schema !== undefined && schema !== null) {
-    schema = sanitizeSchema(schema);
-  }
-
-  // Convert from ArrowTable to Record<String, Vector>
-  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
-    const name = table.schema.fields[idx].name;
-    // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-    const vec = table.getChildAt(idx)!;
-    return [name, vec];
-  });
-  const newColumns = Object.fromEntries(colEntries);
-
-  const sourceColumn = newColumns[embeddings.sourceColumn];
-  const destColumn = embeddings.destColumn ?? "vector";
-  const innerDestType = embeddings.embeddingDataType ?? new Float32();
-  if (sourceColumn === undefined) {
-    throw new Error(
-      `Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`
-    );
-  }
-
-  if (table.numRows === 0) {
-    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      // We have an empty table and it already has the embedding column so no work needs to be done
-      // Note: we don't return an error like we did below because this is a common occurrence.  For example,
-      // if we call convertToTable with 0 records and a schema that includes the embedding
-      return table;
-    }
-    if (embeddings.embeddingDimension !== undefined) {
-      const destType = newVectorType(
-        embeddings.embeddingDimension,
-        innerDestType
-      );
-      newColumns[destColumn] = makeVector([], destType);
-    } else if (schema != null) {
-      const destField = schema.fields.find((f) => f.name === destColumn);
-      if (destField != null) {
-        newColumns[destColumn] = makeVector([], destField.type);
-      } else {
-        throw new Error(
-          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`
-        );
-      }
-    } else {
-      throw new Error(
-        "Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`"
-      );
-    }
-  } else {
-    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      throw new Error(
-        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`
-      );
-    }
-    if (table.batches.length > 1) {
-      throw new Error(
-        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch"
-      );
-    }
-    const values = sourceColumn.toArray();
-    const vectors = await embeddings.embed(values as T[]);
-    if (vectors.length !== values.length) {
-      throw new Error(
-        "Embedding function did not return an embedding for each input element"
-      );
-    }
-    const destType = newVectorType(vectors[0].length, innerDestType);
-    newColumns[destColumn] = makeVector(vectors, destType);
-  }
-
-  const newTable = new ArrowTable(newColumns);
-  if (schema != null) {
-    if (schema.fields.find((f) => f.name === destColumn) === undefined) {
-      throw new Error(
-        `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`
-      );
-    }
-    return alignTable(newTable, schema);
-  }
-  return newTable;
-}
-
-/*
- * Convert an Array of records into an Arrow Table, optionally applying an
- * embeddings function to it.
- *
- * This function calls `makeArrowTable` first to create the Arrow Table.
- * Any provided `makeTableOptions` (e.g. a schema) will be passed on to
- * that call.
- *
- * The embedding function will be passed a column of values (based on the
- * `sourceColumn` of the embedding function) and expects to receive back
- * number[][] which will be converted into a fixed size list column.  By
- * default this will be a fixed size list of Float32 but that can be
- * customized by the `embeddingDataType` property of the embedding function.
- *
- * If a schema is provided in `makeTableOptions` then it should include the
- * embedding columns.  If no schema is provded then embedding columns will
- * be placed at the end of the table, after all of the input columns.
- */
-export async function convertToTable<T>(
-  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunction<T>,
-  makeTableOptions?: Partial<MakeArrowTableOptions>
-): Promise<ArrowTable> {
-  const table = makeArrowTable(data, makeTableOptions);
-  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
-}
-
-// Creates the Arrow Type for a Vector column with dimension `dim`
-function newVectorType<T extends Float>(
-  dim: number,
-  innerType: T
-): FixedSizeList<T> {
-  // Somewhere we always default to have the elements nullable, so we need to set it to true
-  // otherwise we often get schema mismatches because the stored data always has schema with nullable elements
-  const children = new Field<T>("item", innerType, true);
-  return new FixedSizeList(dim, children);
-}
-
-/**
- * Serialize an Array of records into a buffer using the Arrow IPC File serialization
- *
- * This function will call `convertToTable` and pass on `embeddings` and `schema`
- *
- * `schema` is required if data is empty
- */
-export async function fromRecordsToBuffer<T>(
-  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== undefined && schema !== null) {
-    schema = sanitizeSchema(schema);
-  }
-  const table = await convertToTable(data, embeddings, { schema, embeddings });
-  const writer = RecordBatchFileWriter.writeAll(table);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-/**
- * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
- *
- * This function will call `convertToTable` and pass on `embeddings` and `schema`
- *
- * `schema` is required if data is empty
- */
-export async function fromRecordsToStreamBuffer<T>(
-  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== null && schema !== undefined) {
-    schema = sanitizeSchema(schema);
-  }
-  const table = await convertToTable(data, embeddings, { schema });
-  const writer = RecordBatchStreamWriter.writeAll(table);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-/**
- * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
- *
- * This function will apply `embeddings` to the table in a manner similar to
- * `convertToTable`.
- *
- * `schema` is required if the table is empty
- */
-export async function fromTableToBuffer<T>(
-  table: ArrowTable,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== null && schema !== undefined) {
-    schema = sanitizeSchema(schema);
-  }
-  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
-  const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-/**
- * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
- *
- * This function will apply `embeddings` to the table in a manner similar to
- * `convertToTable`.
- *
- * `schema` is required if the table is empty
- */
-export async function fromTableToStreamBuffer<T>(
-  table: ArrowTable,
-  embeddings?: EmbeddingFunction<T>,
-  schema?: Schema
-): Promise<Buffer> {
-  if (schema !== null && schema !== undefined) {
-    schema = sanitizeSchema(schema);
-  }
-  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
-  const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
-  return Buffer.from(await writer.toUint8Array());
-}
-
-function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
-  const alignedChildren = [];
-  for (const field of schema.fields) {
-    const indexInBatch = batch.schema.fields?.findIndex(
-      (f) => f.name === field.name
-    );
-    if (indexInBatch < 0) {
-      throw new Error(
-        `The column ${field.name} was not found in the Arrow Table`
-      );
-    }
-    alignedChildren.push(batch.data.children[indexInBatch]);
-  }
-  const newData = makeData({
-    type: new Struct(schema.fields),
-    length: batch.numRows,
-    nullCount: batch.nullCount,
-    children: alignedChildren
-  });
-  return new RecordBatch(schema, newData);
-}
-
-function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
-  const alignedBatches = table.batches.map((batch) =>
-    alignBatch(batch, schema)
-  );
-  return new ArrowTable(schema, alignedBatches);
-}
-
-// Creates an empty Arrow Table
-export function createEmptyTable(schema: Schema): ArrowTable {
-  return new ArrowTable(sanitizeSchema(schema));
-}
-
-function validateSchemaEmbeddings(
-  schema: Schema<any>,
-  data: Array<Record<string, unknown>>,
-  embeddings: EmbeddingFunction<any> | undefined
-) {
-  const fields = [];
-  const missingEmbeddingFields = [];
-
-  // First we check if the field is a `FixedSizeList`
-  // Then we check if the data contains the field
-  // if it does not, we add it to the list of missing embedding fields
-  // Finally, we check if those missing embedding fields are `this._embeddings`
-  // if they are not, we throw an error
-  for (const field of schema.fields) {
-    if (field.type instanceof FixedSizeList) {
-      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
-        missingEmbeddingFields.push(field);
-      } else {
-        fields.push(field);
-      }
-    } else {
-      fields.push(field);
-    }
-  }
-
-  if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
-    throw new Error(
-      `Table has embeddings: "${missingEmbeddingFields
-        .map((f) => f.name)
-        .join(",")}", but no embedding function was provided`
-    );
-  }
-
-  return new Schema(fields, schema.metadata);
-}
--- a/node/src/embedding/embedding_function.ts
+++ b/node/src/embedding/embedding_function.ts
@@ -1,68 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { type Float } from 'apache-arrow'
-
-/**
- * An embedding function that automatically creates vector representation for a given column.
- */
-export interface EmbeddingFunction<T> {
-  /**
-   * The name of the column that will be used as input for the Embedding Function.
-   */
-  sourceColumn: string
-
-  /**
-   * The data type of the embedding
-   *
-   * The embedding function should return `number`.  This will be converted into
-   * an Arrow float array.  By default this will be Float32 but this property can
-   * be used to control the conversion.
-   */
-  embeddingDataType?: Float
-
-  /**
-   * The dimension of the embedding
-   *
-   * This is optional, normally this can be determined by looking at the results of
-   * `embed`.  If this is not specified, and there is an attempt to apply the embedding
-   * to an empty table, then that process will fail.
-   */
-  embeddingDimension?: number
-
-  /**
-   * The name of the column that will contain the embedding
-   *
-   * By default this is "vector"
-   */
-  destColumn?: string
-
-  /**
-   * Should the source column be excluded from the resulting table
-   *
-   * By default the source column is included.  Set this to true and
-   * only the embedding will be stored.
-   */
-  excludeSource?: boolean
-
-  /**
-   * Creates a vector representation for the given values.
-   */
-  embed: (data: T[]) => Promise<number[][]>
-}
-
-export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
-  return typeof value.sourceColumn === 'string' &&
-      typeof value.embed === 'function'
-}
--- a/node/src/embedding/openai.ts
+++ b/node/src/embedding/openai.ts
@@ -1,57 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { type EmbeddingFunction } from '../index'
-import type OpenAI from 'openai'
-
-export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
-  private readonly _openai: OpenAI
-  private readonly _modelName: string
-
-  constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
-    /**
-     * @type {import("openai").default}
-     */
-    let Openai
-    try {
-      // eslint-disable-next-line @typescript-eslint/no-var-requires
-      Openai = require('openai')
-    } catch {
-      throw new Error('please install openai@^4.24.1 using npm install openai')
-    }
-
-    this.sourceColumn = sourceColumn
-    const configuration = {
-      apiKey: openAIKey
-    }
-
-    this._openai = new Openai(configuration)
-    this._modelName = modelName
-  }
-
-  async embed (data: string[]): Promise<number[][]> {
-    const response = await this._openai.embeddings.create({
-      model: this._modelName,
-      input: data
-    })
-
-    const embeddings: number[][] = []
-    for (let i = 0; i < response.data.length; i++) {
-      embeddings.push(response.data[i].embedding)
-    }
-    return embeddings
-  }
-
-  sourceColumn: string
-}
--- a/node/src/index.ts
+++ b/node/src/index.ts
--- a/node/src/integration_test/test.ts
+++ b/node/src/integration_test/test.ts
@@ -1,155 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { describe } from 'mocha'
-import * as chai from 'chai'
-import { assert } from 'chai'
-import * as chaiAsPromised from 'chai-as-promised'
-import { v4 as uuidv4 } from 'uuid'
-
-import * as lancedb from '../index'
-import { tmpdir } from 'os'
-import * as fs from 'fs'
-import * as path from 'path'
-
-chai.use(chaiAsPromised)
-
-describe('LanceDB AWS Integration test', function () {
-  it('s3+ddb schema is processed correctly', async function () {
-    this.timeout(15000)
-
-    // WARNING: specifying engine is NOT a publicly supported feature in lancedb yet
-    // THE API WILL CHANGE
-    const conn = await lancedb.connect('s3://lancedb-integtest?engine=ddb&ddbTableName=lancedb-integtest')
-    const data = [{ vector: Array(128).fill(1.0) }]
-
-    const tableName = uuidv4()
-    let table = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
-
-    const futs = [table.add(data), table.add(data), table.add(data), table.add(data), table.add(data)]
-    await Promise.allSettled(futs)
-
-    table = await conn.openTable(tableName)
-    assert.equal(await table.countRows(), 6)
-  })
-})
-
-describe('LanceDB Mirrored Store Integration test', function () {
-  it('s3://...?mirroredStore=... param is processed correctly', async function () {
-    this.timeout(600000)
-
-    const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-'))
-    console.log(dir)
-    const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
-    const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
-    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 1 }))
-    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 2 }))
-    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 3 }))
-
-    const tableName = uuidv4()
-
-    // try create table and check if it's mirrored
-    const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
-
-    const mirroredPath = path.join(dir, `${tableName}.lance`)
-
-    const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
-    // there should be three dirs
-    assert.equal(files.length, 3, 'files after table creation')
-    assert.isTrue(files[0].isDirectory())
-    assert.isTrue(files[1].isDirectory())
-
-    const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
-    assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation')
-    assert.isTrue(transactionFiles[0].name.endsWith('.txn'))
-
-    const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true })
-    assert.equal(versionFiles.length, 1, 'versionFiles after table creation')
-    assert.isTrue(versionFiles[0].name.endsWith('.manifest'))
-
-    const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
-    assert.equal(dataFiles.length, 1, 'dataFiles after table creation')
-    assert.isTrue(dataFiles[0].name.endsWith('.lance'))
-
-    // try create index and check if it's mirrored
-    await t.createIndex({ column: 'vector', type: 'ivf_pq' })
-
-    const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
-    // there should be four dirs
-    assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex')
-    assert.isTrue(filesAfterIndex[0].isDirectory())
-    assert.isTrue(filesAfterIndex[1].isDirectory())
-    assert.isTrue(filesAfterIndex[2].isDirectory())
-
-    // Two TXs now
-    const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
-    assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex')
-    assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn'))
-    assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn'))
-
-    const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
-    assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex')
-    assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance'))
-
-    const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
-    assert.equal(indicesFiles.length, 1, 'indicesFiles')
-    assert.isTrue(indicesFiles[0].isDirectory())
-
-    const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true })
-    console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
-    assert.equal(indexFiles.length, 2, 'indexFiles')
-    const fileNames = indexFiles.map(f => f.name).sort()
-    assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present')
-    assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present')
-    assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files')
-
-    // try delete and check if it's mirrored
-    await t.delete('id = 0')
-
-    const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
-    // there should be five dirs
-    assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete')
-    assert.isTrue(filesAfterDelete[0].isDirectory())
-    assert.isTrue(filesAfterDelete[1].isDirectory())
-    assert.isTrue(filesAfterDelete[2].isDirectory())
-    assert.isTrue(filesAfterDelete[3].isDirectory())
-    assert.isTrue(filesAfterDelete[4].isDirectory())
-
-    // Three TXs now
-    const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
-    assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete')
-    assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn'))
-    assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn'))
-
-    const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
-    assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete')
-    assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance'))
-
-    const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
-    assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete')
-    assert.isTrue(indicesFilesAfterDelete[0].isDirectory())
-
-    const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true })
-    console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
-    assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
-    const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
-    assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
-    assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
-    assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')
-
-    const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true })
-    assert.equal(deletionFiles.length, 1, 'deletionFiles')
-    assert.isTrue(deletionFiles[0].name.endsWith('.arrow'))
-  })
-})
--- a/node/src/middleware.ts
+++ b/node/src/middleware.ts
@@ -1,58 +0,0 @@
-// Copyright 2024 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-/**
- * Middleware for Remote LanceDB Connection or Table
- */
-export interface HttpMiddleware {
-  /**
-   * A callback that can be used to instrument the behavior of http requests to remote
-   * tables. It can be used to add headers, modify the request, or even short-circuit
-   * the request and return a response without making the request to the remote endpoint.
-   * It can also be used to modify the response from the remote endpoint.
-   *
-   * @param {RemoteResponse} res - Request to the remote endpoint
-   * @param {onRemoteRequestNext} next - Callback to advance the middleware chain
-   */
-  onRemoteRequest(
-    req: RemoteRequest,
-    next: (req: RemoteRequest) => Promise<RemoteResponse>,
-  ): Promise<RemoteResponse>
-};
-
-export enum Method {
-  GET,
-  POST
-}
-
-/**
- * A LanceDB Remote HTTP Request
- */
-export interface RemoteRequest {
-  uri: string
-  method: Method
-  headers: Map<string, string>
-  params?: Map<string, string>
-  body?: any
-}
-
-/**
- * A LanceDB Remote HTTP Response
- */
-export interface RemoteResponse {
-  status: number
-  statusText: string
-  headers: Map<string, string>
-  body: () => Promise<any>
-}
--- a/node/src/query.ts
+++ b/node/src/query.ts
@@ -1,163 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { Vector, tableFromIPC } from 'apache-arrow'
-import { type EmbeddingFunction } from './embedding/embedding_function'
-import { type MetricType } from '.'
-
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const { tableSearch } = require('../native.js')
-
-/**
- * A builder for nearest neighbor queries for LanceDB.
- */
-export class Query<T = number[]> {
-  private readonly _query?: T
-  private readonly _tbl?: any
-  private _queryVector?: number[]
-  private _limit?: number
-  private _refineFactor?: number
-  private _nprobes: number
-  private _select?: string[]
-  private _filter?: string
-  private _metricType?: MetricType
-  private _prefilter: boolean
-  private _fastSearch: boolean
-  protected readonly _embeddings?: EmbeddingFunction<T>
-
-  constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
-    this._tbl = tbl
-    this._query = query
-    this._limit = 10
-    this._nprobes = 20
-    this._refineFactor = undefined
-    this._select = undefined
-    this._filter = undefined
-    this._metricType = undefined
-    this._embeddings = embeddings
-    this._prefilter = false
-    this._fastSearch = false
-  }
-
-  /***
-     * Sets the number of results that will be returned
-     * default value is 10
-     * @param value number of results
-     */
-  limit (value: number): Query<T> {
-    this._limit = value
-    return this
-  }
-
-  /**
-     * Refine the results by reading extra elements and re-ranking them in memory.
-     * @param value refine factor to use in this query.
-     */
-  refineFactor (value: number): Query<T> {
-    this._refineFactor = value
-    return this
-  }
-
-  /**
-     * The number of probes used. A higher number makes search more accurate but also slower.
-     * @param value The number of probes used.
-     */
-  nprobes (value: number): Query<T> {
-    this._nprobes = value
-    return this
-  }
-
-  /**
-     * A filter statement to be applied to this query.
-     * @param value A filter in the same format used by a sql WHERE clause.
-     */
-  filter (value: string): Query<T> {
-    this._filter = value
-    return this
-  }
-
-  where = this.filter
-
-  /** Return only the specified columns.
-     *
-     * @param value Only select the specified columns. If not specified, all columns will be returned.
-     */
-  select (value: string[]): Query<T> {
-    this._select = value
-    return this
-  }
-
-  /**
-     * The MetricType used for this Query.
-     * @param value The metric to the. @see MetricType for the different options
-     */
-  metricType (value: MetricType): Query<T> {
-    this._metricType = value
-    return this
-  }
-
-  prefilter (value: boolean): Query<T> {
-    this._prefilter = value
-    return this
-  }
-
-  /**
-   * Skip searching un-indexed data. This can make search faster, but will miss
-   * any data that is not yet indexed.
-   */
-  fastSearch (value: boolean): Query<T> {
-    this._fastSearch = value
-    return this
-  }
-
-  /**
-     * Execute the query and return the results as an Array of Objects
-     */
-  async execute<T = Record<string, unknown>> (): Promise<T[]> {
-    if (this._query !== undefined) {
-      if (this._embeddings !== undefined) {
-        this._queryVector = (await this._embeddings.embed([this._query]))[0]
-      } else {
-        this._queryVector = this._query as number[]
-      }
-    }
-
-    const isElectron = this.isElectron()
-    const buffer = await tableSearch.call(this._tbl, this, isElectron)
-    const data = tableFromIPC(buffer)
-
-    return data.toArray().map((entry: Record<string, unknown>) => {
-      const newObject: Record<string, unknown> = {}
-      Object.keys(entry).forEach((key: string) => {
-        if (entry[key] instanceof Vector) {
-          // toJSON() returns f16 array correctly
-          newObject[key] = (entry[key] as any).toJSON()
-        } else {
-          newObject[key] = entry[key] as any
-        }
-      })
-      return newObject as unknown as T
-    })
-  }
-
-  // See https://github.com/electron/electron/issues/2288
-  private isElectron (): boolean {
-    try {
-      // eslint-disable-next-line no-prototype-builtins
-      return (process?.versions?.hasOwnProperty('electron') || navigator?.userAgent?.toLowerCase()?.includes(' electron'))
-    } catch (e) {
-      return false
-    }
-  }
-}
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -1,302 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import axios, { type AxiosError, type AxiosResponse, type ResponseType } from 'axios'
-
-import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
-
-import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
-import type { MetricType } from '..'
-
-interface HttpLancedbClientMiddleware {
-  onRemoteRequest(
-    req: RemoteRequest,
-    next: (req: RemoteRequest) => Promise<RemoteResponse>,
-  ): Promise<RemoteResponse>
-}
-
-/**
- * Invoke the middleware chain and at the end call the remote endpoint
- */
-async function callWithMiddlewares (
-  req: RemoteRequest,
-  middlewares: HttpLancedbClientMiddleware[],
-  opts?: MiddlewareInvocationOptions
-): Promise<RemoteResponse> {
-  async function call (
-    i: number,
-    req: RemoteRequest
-  ): Promise<RemoteResponse> {
-    // if we have reached the end of the middleware chain, make the request
-    if (i > middlewares.length) {
-      const headers = Object.fromEntries(req.headers.entries())
-      const params = Object.fromEntries(req.params?.entries() ?? [])
-      const timeout = opts?.timeout
-      let res
-      if (req.method === Method.POST) {
-        res = await axios.post(
-          req.uri,
-          req.body,
-          {
-            headers,
-            params,
-            timeout,
-            responseType: opts?.responseType
-          }
-        )
-      } else {
-        res = await axios.get(
-          req.uri,
-          {
-            headers,
-            params,
-            timeout
-          }
-        )
-      }
-
-      return toLanceRes(res)
-    }
-
-    // call next middleware in chain
-    return await middlewares[i - 1].onRemoteRequest(
-      req,
-      async (req) => {
-        return await call(i + 1, req)
-      }
-    )
-  }
-
-  return await call(1, req)
-}
-
-interface MiddlewareInvocationOptions {
-  responseType?: ResponseType
-  timeout?: number
-}
-
-/**
- * Marshall the library response into a LanceDB response
- */
-function toLanceRes (res: AxiosResponse): RemoteResponse {
-  const headers = new Map()
-  for (const h in res.headers) {
-    headers.set(h, res.headers[h])
-  }
-
-  return {
-    status: res.status,
-    statusText: res.statusText,
-    headers,
-    body: async () => {
-      return res.data
-    }
-  }
-}
-
-async function decodeErrorData(
-  res: RemoteResponse,
-  responseType?: ResponseType
-): Promise<string> {
-  const errorData = await res.body()
-  if (responseType === 'arraybuffer') {
-      return new TextDecoder().decode(errorData)
-  } else {
-    if (typeof errorData === 'object') {
-      return JSON.stringify(errorData)
-    }
-
-    return errorData
-  }
-}
-
-export class HttpLancedbClient {
-  private readonly _url: string
-  private readonly _apiKey: () => string
-  private readonly _middlewares: HttpLancedbClientMiddleware[]
-  private readonly _timeout: number | undefined
-
-  public constructor (
-    url: string,
-    apiKey: string,
-    timeout?: number,
-    private readonly _dbName?: string
-
-  ) {
-    this._url = url
-    this._apiKey = () => apiKey
-    this._middlewares = []
-    this._timeout = timeout
-  }
-
-  get uri (): string {
-    return this._url
-  }
-
-  public async search (
-    tableName: string,
-    vector: number[],
-    k: number,
-    nprobes: number,
-    prefilter: boolean,
-    refineFactor?: number,
-    columns?: string[],
-    filter?: string,
-    metricType?: MetricType,
-    fastSearch?: boolean
-  ): Promise<ArrowTable<any>> {
-    const result = await this.post(
-      `/v1/table/${tableName}/query/`,
-      {
-        vector,
-        k,
-        nprobes,
-        refine_factor: refineFactor,
-        columns,
-        filter,
-        prefilter,
-        metric: metricType,
-        fast_search: fastSearch
-      },
-      undefined,
-      undefined,
-      'arraybuffer'
-    )
-    const table = tableFromIPC(await result.body())
-    return table
-  }
-
-  /**
-   * Sent GET request.
-   */
-  public async get (path: string, params?: Record<string, string>): Promise<RemoteResponse> {
-    const req = {
-      uri: `${this._url}${path}`,
-      method: Method.GET,
-      headers: new Map(Object.entries({
-        'Content-Type': 'application/json',
-        'x-api-key': this._apiKey(),
-        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-      })),
-      params: new Map(Object.entries(params ?? {}))
-    }
-
-    let response
-    try {
-      response = await callWithMiddlewares(req, this._middlewares)
-      return response
-    } catch (err: any) {
-      console.error(serializeErrorAsJson(err))
-      if (err.response === undefined) {
-        throw new Error(`Network Error: ${err.message as string}`)
-      }
-
-      response = toLanceRes(err.response)
-    }
-
-    if (response.status !== 200) {
-      const errorData = await decodeErrorData(response)
-      throw new Error(
-        `Server Error, status: ${response.status}, ` +
-        `message: ${response.statusText}: ${errorData}`
-      )
-    }
-
-    return response
-  }
-
-  /**
-   * Sent POST request.
-   */
-  public async post (
-    path: string,
-    data?: any,
-    params?: Record<string, string>,
-    content?: string | undefined,
-    responseType?: ResponseType | undefined
-  ): Promise<RemoteResponse> {
-    const req = {
-      uri: `${this._url}${path}`,
-      method: Method.POST,
-      headers: new Map(Object.entries({
-        'Content-Type': content ?? 'application/json',
-        'x-api-key': this._apiKey(),
-        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-      })),
-      params: new Map(Object.entries(params ?? {})),
-      body: data
-    }
-
-    let response
-    try {
-      response = await callWithMiddlewares(req, this._middlewares, {
-        responseType,
-        timeout: this._timeout
-      })
-
-      // return response
-    } catch (err: any) {
-      console.error(serializeErrorAsJson(err))
-
-      if (err.response === undefined) {
-        throw new Error(`Network Error: ${err.message as string}`)
-      }
-      response = toLanceRes(err.response)
-    }
-
-    if (response.status !== 200) {
-      const errorData = await decodeErrorData(response, responseType)
-      throw new Error(
-        `Server Error, status: ${response.status}, ` +
-        `message: ${response.statusText}: ${errorData}`
-      )
-    }
-
-    return response
-  }
-
-  /**
-   * Instrument this client with middleware
-   * @param mw - The middleware that instruments the client
-   * @returns - an instance of this client instrumented with the middleware
-   */
-  public withMiddleware (mw: HttpLancedbClientMiddleware): HttpLancedbClient {
-    const wrapped = this.clone()
-    wrapped._middlewares.push(mw)
-    return wrapped
-  }
-
-  /**
-   * Make a clone of this client
-   */
-  private clone (): HttpLancedbClient {
-    const clone = new HttpLancedbClient(this._url, this._apiKey(), this._timeout, this._dbName)
-    for (const mw of this._middlewares) {
-      clone._middlewares.push(mw)
-    }
-    return clone
-  }
-}
-
-function serializeErrorAsJson(err: AxiosError) {
-  const error = JSON.parse(JSON.stringify(err, Object.getOwnPropertyNames(err)))
-  error.response = err.response != null
-      ? JSON.parse(JSON.stringify(
-        err.response,
-        // config contains the request data, too noisy
-        Object.getOwnPropertyNames(err.response).filter(prop => prop !== 'config')
-      ))
-      : null
-  return JSON.stringify({ error })
-}
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -1,567 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import {
-  type EmbeddingFunction,
-  type Table,
-  type VectorIndexParams,
-  type Connection,
-  type ConnectionOptions,
-  type CreateTableOptions,
-  type VectorIndex,
-  type WriteOptions,
-  type IndexStats,
-  type UpdateArgs,
-  type UpdateSqlArgs,
-  makeArrowTable,
-  type MergeInsertArgs,
-  type ColumnAlteration
-} from '../index'
-import { Query } from '../query'
-
-import { Vector, Table as ArrowTable } from 'apache-arrow'
-import { HttpLancedbClient } from './client'
-import { isEmbeddingFunction } from '../embedding/embedding_function'
-import {
-  createEmptyTable,
-  fromRecordsToStreamBuffer,
-  fromTableToStreamBuffer
-} from '../arrow'
-import { toSQL, TTLCache } from '../util'
-import { type HttpMiddleware } from '../middleware'
-
-/**
- * Remote connection.
- */
-export class RemoteConnection implements Connection {
-  private _client: HttpLancedbClient
-  private readonly _dbName: string
-  private readonly _tableCache = new TTLCache(300_000)
-
-  constructor (opts: ConnectionOptions) {
-    if (!opts.uri.startsWith('db://')) {
-      throw new Error(`Invalid remote DB URI: ${opts.uri}`)
-    }
-    if (opts.apiKey == null || opts.apiKey === '') {
-      opts = Object.assign({}, opts, { apiKey: process.env.LANCEDB_API_KEY })
-    }
-    if (opts.apiKey === undefined || opts.region === undefined) {
-      throw new Error(
-        'API key and region are must be passed for remote connections. ' +
-        'API key can also be set through LANCEDB_API_KEY env variable.')
-    }
-
-    this._dbName = opts.uri.slice('db://'.length)
-    let server: string
-    if (opts.hostOverride === undefined) {
-      server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
-    } else {
-      server = opts.hostOverride
-    }
-    this._client = new HttpLancedbClient(
-      server,
-      opts.apiKey,
-      opts.timeout,
-      opts.hostOverride === undefined ? undefined : this._dbName
-    )
-  }
-
-  get uri (): string {
-    // add the lancedb+ prefix back
-    return 'db://' + this._client.uri
-  }
-
-  async tableNames (
-    pageToken: string = '',
-    limit: number = 10
-  ): Promise<string[]> {
-    const response = await this._client.get('/v1/table/', {
-      limit: `${limit}`,
-      page_token: pageToken
-    })
-    const body = await response.body()
-    for (const table of body.tables) {
-      this._tableCache.set(table, true)
-    }
-    return body.tables
-  }
-
-  async openTable (name: string): Promise<Table>
-  async openTable<T>(
-    name: string,
-    embeddings: EmbeddingFunction<T>
-  ): Promise<Table<T>>
-  async openTable<T>(
-    name: string,
-    embeddings?: EmbeddingFunction<T>
-  ): Promise<Table<T>> {
-      // check if the table exists
-      if (this._tableCache.get(name) === undefined) {
-        await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
-        this._tableCache.set(name, true)
-      }
-
-    if (embeddings !== undefined) {
-      return new RemoteTable(this._client, name, embeddings)
-    } else {
-      return new RemoteTable(this._client, name)
-    }
-  }
-
-  async createTable<T>(
-    nameOrOpts: string | CreateTableOptions<T>,
-    data?: Array<Record<string, unknown>> | ArrowTable,
-    optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>,
-    opt?: WriteOptions
-  ): Promise<Table<T>> {
-    // Logic copied from LocatlConnection, refactor these to a base class + connectionImpl pattern
-    let schema
-    let embeddings: undefined | EmbeddingFunction<T>
-    let tableName: string
-    if (typeof nameOrOpts === 'string') {
-      if (
-        optsOrEmbedding !== undefined &&
-        isEmbeddingFunction(optsOrEmbedding)
-      ) {
-        embeddings = optsOrEmbedding
-      }
-      tableName = nameOrOpts
-    } else {
-      schema = nameOrOpts.schema
-      embeddings = nameOrOpts.embeddingFunction
-      tableName = nameOrOpts.name
-      if (data === undefined) {
-        data = nameOrOpts.data
-      }
-    }
-
-    let buffer: Buffer
-
-    function isEmpty (
-      data: Array<Record<string, unknown>> | ArrowTable<any>
-    ): boolean {
-      if (data instanceof ArrowTable) {
-        return data.numRows === 0
-      }
-      return data.length === 0
-    }
-
-    if (data === undefined || isEmpty(data)) {
-      if (schema === undefined) {
-        throw new Error('Either data or schema needs to defined')
-      }
-      buffer = await fromTableToStreamBuffer(createEmptyTable(schema))
-    } else if (data instanceof ArrowTable) {
-      buffer = await fromTableToStreamBuffer(data, embeddings)
-    } else {
-      // data is Array<Record<...>>
-      buffer = await fromRecordsToStreamBuffer(data, embeddings)
-    }
-
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(tableName)}/create/`,
-      buffer,
-      undefined,
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-
-    this._tableCache.set(tableName, true)
-    if (embeddings === undefined) {
-      return new RemoteTable(this._client, tableName)
-    } else {
-      return new RemoteTable(this._client, tableName, embeddings)
-    }
-  }
-
-  async dropTable (name: string): Promise<void> {
-    await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
-    this._tableCache.delete(name)
-  }
-
-  withMiddleware (middleware: HttpMiddleware): Connection {
-    const wrapped = this.clone()
-    wrapped._client = wrapped._client.withMiddleware(middleware)
-    return wrapped
-  }
-
-  private clone (): RemoteConnection {
-    const clone: RemoteConnection = Object.create(RemoteConnection.prototype)
-    return Object.assign(clone, this)
-  }
-}
-
-export class RemoteQuery<T = number[]> extends Query<T> {
-  constructor (
-    query: T,
-    private readonly _client: HttpLancedbClient,
-    private readonly _name: string,
-    embeddings?: EmbeddingFunction<T>
-  ) {
-    super(query, undefined, embeddings)
-  }
-
-  // TODO: refactor this to a base class + queryImpl pattern
-  async execute<T = Record<string, unknown>>(): Promise<T[]> {
-    const embeddings = this._embeddings
-    const query = (this as any)._query
-    let queryVector: number[]
-
-    if (embeddings !== undefined) {
-      queryVector = (await embeddings.embed([query]))[0]
-    } else {
-      queryVector = query as number[]
-    }
-
-    const data = await this._client.search(
-      this._name,
-      queryVector,
-      (this as any)._limit,
-      (this as any)._nprobes,
-      (this as any)._prefilter,
-      (this as any)._refineFactor,
-      (this as any)._select,
-      (this as any)._filter,
-      (this as any)._metricType,
-      (this as any)._fastSearch
-    )
-
-    return data.toArray().map((entry: Record<string, unknown>) => {
-      const newObject: Record<string, unknown> = {}
-      Object.keys(entry).forEach((key: string) => {
-        if (entry[key] instanceof Vector) {
-          newObject[key] = (entry[key] as any).toArray()
-        } else {
-          newObject[key] = entry[key] as any
-        }
-      })
-      return newObject as unknown as T
-    })
-  }
-}
-
-// we are using extend until we have next next version release
-// Table and Connection has both been refactored to interfaces
-export class RemoteTable<T = number[]> implements Table<T> {
-  private _client: HttpLancedbClient
-  private readonly _embeddings?: EmbeddingFunction<T>
-  private readonly _name: string
-
-  constructor (client: HttpLancedbClient, name: string)
-  constructor (
-    client: HttpLancedbClient,
-    name: string,
-    embeddings: EmbeddingFunction<T>
-  )
-  constructor (
-    client: HttpLancedbClient,
-    name: string,
-    embeddings?: EmbeddingFunction<T>
-  ) {
-    this._client = client
-    this._name = name
-    this._embeddings = embeddings
-  }
-
-  get name (): string {
-    return this._name
-  }
-
-  get schema (): Promise<any> {
-    return this._client
-      .post(`/v1/table/${encodeURIComponent(this._name)}/describe/`)
-      .then(async (res) => {
-        if (res.status !== 200) {
-          throw new Error(
-            `Server Error, status: ${res.status}, ` +
-              // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-              `message: ${res.statusText}: ${await res.body()}`
-          )
-        }
-        return (await res.body())?.schema
-      })
-  }
-
-  search (query: T): Query<T> {
-    return new RemoteQuery(query, this._client, encodeURIComponent(this._name)) //, this._embeddings_new)
-  }
-
-  filter (where: string): Query<T> {
-    throw new Error('Not implemented')
-  }
-
-  async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> {
-    let tbl: ArrowTable
-    if (data instanceof ArrowTable) {
-      tbl = data
-    } else {
-      tbl = makeArrowTable(data, await this.schema)
-    }
-
-    const queryParams: any = {
-      on
-    }
-    if (args.whenMatchedUpdateAll !== false && args.whenMatchedUpdateAll !== null && args.whenMatchedUpdateAll !== undefined) {
-      queryParams.when_matched_update_all = 'true'
-      if (typeof args.whenMatchedUpdateAll === 'string') {
-        queryParams.when_matched_update_all_filt = args.whenMatchedUpdateAll
-      }
-    } else {
-      queryParams.when_matched_update_all = 'false'
-    }
-    if (args.whenNotMatchedInsertAll ?? false) {
-      queryParams.when_not_matched_insert_all = 'true'
-    } else {
-      queryParams.when_not_matched_insert_all = 'false'
-    }
-    if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) {
-      queryParams.when_not_matched_by_source_delete = 'true'
-      if (typeof args.whenNotMatchedBySourceDelete === 'string') {
-        queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete
-      }
-    } else {
-      queryParams.when_not_matched_by_source_delete = 'false'
-    }
-
-    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/merge_insert/`,
-      buffer,
-      queryParams,
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-
-  async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
-    let tbl: ArrowTable
-    if (data instanceof ArrowTable) {
-      tbl = data
-    } else {
-      tbl = makeArrowTable(data, await this.schema)
-    }
-
-    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
-      buffer,
-      {
-        mode: 'append'
-      },
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-    return tbl.numRows
-  }
-
-  async overwrite (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
-    let tbl: ArrowTable
-    if (data instanceof ArrowTable) {
-      tbl = data
-    } else {
-      tbl = makeArrowTable(data)
-    }
-    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
-      buffer,
-      {
-        mode: 'overwrite'
-      },
-      'application/vnd.apache.arrow.stream'
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-    return tbl.numRows
-  }
-
-  async createIndex (indexParams: VectorIndexParams): Promise<void> {
-    const unsupportedParams = [
-      'index_name',
-      'num_partitions',
-      'max_iters',
-      'use_opq',
-      'num_sub_vectors',
-      'num_bits',
-      'max_opq_iters',
-      'replace'
-    ]
-    for (const param of unsupportedParams) {
-      // eslint-disable-next-line @typescript-eslint/strict-boolean-expressions
-      if (indexParams[param as keyof VectorIndexParams]) {
-        throw new Error(`${param} is not supported for remote connections`)
-      }
-    }
-
-    const column = indexParams.column ?? 'vector'
-    const indexType = 'vector'
-    const metricType = indexParams.metric_type ?? 'L2'
-    const indexCacheSize = indexParams.index_cache_size ?? null
-
-    const data = {
-      column,
-      index_type: indexType,
-      metric_type: metricType,
-      index_cache_size: indexCacheSize
-    }
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/create_index/`,
-      data
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-
-  async createScalarIndex (column: string): Promise<void> {
-    const indexType = 'scalar'
-
-    const data = {
-      column,
-      index_type: indexType,
-      replace: true
-    }
-    const res = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/create_scalar_index/`,
-      data
-    )
-    if (res.status !== 200) {
-      throw new Error(
-        `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-  async dropIndex (index_name: string): Promise<void> {
-    const res = await this._client.post(
-        `/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
-    )
-    if (res.status !== 200) {
-      throw new Error(
-          `Server Error, status: ${res.status}, ` +
-          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${await res.body()}`
-      )
-    }
-  }
-
-  async countRows (filter?: string): Promise<number> {
-    const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
-      predicate: filter
-    })
-    return (await result.body())
-  }
-
-  async delete (filter: string): Promise<void> {
-    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/delete/`, {
-      predicate: filter
-    })
-  }
-
-  async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
-    let filter: string | null
-    let updates: Record<string, string>
-
-    if ('valuesSql' in args) {
-      filter = args.where ?? null
-      updates = args.valuesSql
-    } else {
-      filter = args.where ?? null
-      updates = {}
-      for (const [key, value] of Object.entries(args.values)) {
-        updates[key] = toSQL(value)
-      }
-    }
-    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/update/`, {
-      predicate: filter,
-      updates: Object.entries(updates).map(([key, value]) => [key, value])
-    })
-  }
-
-  async listIndices (): Promise<VectorIndex[]> {
-    const results = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/index/list/`
-    )
-    return (await results.body()).indexes?.map((index: any) => ({
-      columns: index.columns,
-      name: index.index_name,
-      uuid: index.index_uuid,
-      status: index.status
-    }))
-  }
-
-  async indexStats (indexName: string): Promise<IndexStats> {
-    const results = await this._client.post(
-      `/v1/table/${encodeURIComponent(this._name)}/index/${indexName}/stats/`
-    )
-    const body = await results.body()
-    return {
-      numIndexedRows: body?.num_indexed_rows,
-      numUnindexedRows: body?.num_unindexed_rows,
-      indexType: body?.index_type,
-      distanceType: body?.distance_type
-    }
-  }
-
-  async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
-    throw new Error('Add columns is not yet supported in LanceDB Cloud.')
-  }
-
-  async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
-    throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
-  }
-
-  async dropColumns (columnNames: string[]): Promise<void> {
-    throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
-  }
-
-  withMiddleware(middleware: HttpMiddleware): Table<T> {
-    const wrapped = this.clone()
-    wrapped._client = wrapped._client.withMiddleware(middleware)
-    return wrapped
-  }
-
-  private clone (): RemoteTable<T> {
-    const clone: RemoteTable<T> = Object.create(RemoteTable.prototype)
-    return Object.assign(clone, this)
-  }
-}
--- a/node/src/sanitize.ts
+++ b/node/src/sanitize.ts
@@ -1,508 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// The utilities in this file help sanitize data from the user's arrow
-// library into the types expected by vectordb's arrow library.  Node
-// generally allows for mulitple versions of the same library (and sometimes
-// even multiple copies of the same version) to be installed at the same
-// time.  However, arrow-js uses instanceof which expected that the input
-// comes from the exact same library instance.  This is not always the case
-// and so we must sanitize the input to ensure that it is compatible.
-
-import {
-  Field,
-  Utf8,
-  FixedSizeBinary,
-  FixedSizeList,
-  Schema,
-  List,
-  Struct,
-  Float,
-  Bool,
-  Date_,
-  Decimal,
-  type DataType,
-  Dictionary,
-  Binary,
-  Float32,
-  Interval,
-  Map_,
-  Duration,
-  Union,
-  Time,
-  Timestamp,
-  Type,
-  Null,
-  Int,
-  type Precision,
-  type DateUnit,
-  Int8,
-  Int16,
-  Int32,
-  Int64,
-  Uint8,
-  Uint16,
-  Uint32,
-  Uint64,
-  Float16,
-  Float64,
-  DateDay,
-  DateMillisecond,
-  DenseUnion,
-  SparseUnion,
-  TimeNanosecond,
-  TimeMicrosecond,
-  TimeMillisecond,
-  TimeSecond,
-  TimestampNanosecond,
-  TimestampMicrosecond,
-  TimestampMillisecond,
-  TimestampSecond,
-  IntervalDayTime,
-  IntervalYearMonth,
-  DurationNanosecond,
-  DurationMicrosecond,
-  DurationMillisecond,
-  DurationSecond
-} from "apache-arrow";
-import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type";
-
-function sanitizeMetadata(
-  metadataLike?: unknown
-): Map<string, string> | undefined {
-  if (metadataLike === undefined || metadataLike === null) {
-    return undefined;
-  }
-  if (!(metadataLike instanceof Map)) {
-    throw Error("Expected metadata, if present, to be a Map<string, string>");
-  }
-  for (const item of metadataLike) {
-    if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
-      throw Error(
-        "Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values"
-      );
-    }
-  }
-  return metadataLike as Map<string, string>;
-}
-
-function sanitizeInt(typeLike: object) {
-  if (
-    !("bitWidth" in typeLike) ||
-    typeof typeLike.bitWidth !== "number" ||
-    !("isSigned" in typeLike) ||
-    typeof typeLike.isSigned !== "boolean"
-  ) {
-    throw Error(
-      "Expected an Int Type to have a `bitWidth` and `isSigned` property"
-    );
-  }
-  return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
-}
-
-function sanitizeFloat(typeLike: object) {
-  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
-    throw Error("Expected a Float Type to have a `precision` property");
-  }
-  return new Float(typeLike.precision as Precision);
-}
-
-function sanitizeDecimal(typeLike: object) {
-  if (
-    !("scale" in typeLike) ||
-    typeof typeLike.scale !== "number" ||
-    !("precision" in typeLike) ||
-    typeof typeLike.precision !== "number" ||
-    !("bitWidth" in typeLike) ||
-    typeof typeLike.bitWidth !== "number"
-  ) {
-    throw Error(
-      "Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties"
-    );
-  }
-  return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
-}
-
-function sanitizeDate(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected a Date type to have a `unit` property");
-  }
-  return new Date_(typeLike.unit as DateUnit);
-}
-
-function sanitizeTime(typeLike: object) {
-  if (
-    !("unit" in typeLike) ||
-    typeof typeLike.unit !== "number" ||
-    !("bitWidth" in typeLike) ||
-    typeof typeLike.bitWidth !== "number"
-  ) {
-    throw Error(
-      "Expected a Time type to have `unit` and `bitWidth` properties"
-    );
-  }
-  return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
-}
-
-function sanitizeTimestamp(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected a Timestamp type to have a `unit` property");
-  }
-  let timezone = null;
-  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
-    timezone = typeLike.timezone;
-  }
-  return new Timestamp(typeLike.unit, timezone);
-}
-
-function sanitizeTypedTimestamp(
-  typeLike: object,
-  Datatype:
-    | typeof TimestampNanosecond
-    | typeof TimestampMicrosecond
-    | typeof TimestampMillisecond
-    | typeof TimestampSecond
-) {
-  let timezone = null;
-  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
-    timezone = typeLike.timezone;
-  }
-  return new Datatype(timezone);
-}
-
-function sanitizeInterval(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected an Interval type to have a `unit` property");
-  }
-  return new Interval(typeLike.unit);
-}
-
-function sanitizeList(typeLike: object) {
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a List type to have an array-like `children` property"
-    );
-  }
-  if (typeLike.children.length !== 1) {
-    throw Error("Expected a List type to have exactly one child");
-  }
-  return new List(sanitizeField(typeLike.children[0]));
-}
-
-function sanitizeStruct(typeLike: object) {
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a Struct type to have an array-like `children` property"
-    );
-  }
-  return new Struct(typeLike.children.map((child) => sanitizeField(child)));
-}
-
-function sanitizeUnion(typeLike: object) {
-  if (
-    !("typeIds" in typeLike) ||
-    !("mode" in typeLike) ||
-    typeof typeLike.mode !== "number"
-  ) {
-    throw Error(
-      "Expected a Union type to have `typeIds` and `mode` properties"
-    );
-  }
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a Union type to have an array-like `children` property"
-    );
-  }
-
-  return new Union(
-    typeLike.mode,
-    typeLike.typeIds as any,
-    typeLike.children.map((child) => sanitizeField(child))
-  );
-}
-
-function sanitizeTypedUnion(
-  typeLike: object,
-  UnionType: typeof DenseUnion | typeof SparseUnion
-) {
-  if (!("typeIds" in typeLike)) {
-    throw Error(
-      "Expected a DenseUnion/SparseUnion type to have a `typeIds` property"
-    );
-  }
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a DenseUnion/SparseUnion type to have an array-like `children` property"
-    );
-  }
-
-  return new UnionType(
-    typeLike.typeIds as any,
-    typeLike.children.map((child) => sanitizeField(child))
-  );
-}
-
-function sanitizeFixedSizeBinary(typeLike: object) {
-  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
-    throw Error(
-      "Expected a FixedSizeBinary type to have a `byteWidth` property"
-    );
-  }
-  return new FixedSizeBinary(typeLike.byteWidth);
-}
-
-function sanitizeFixedSizeList(typeLike: object) {
-  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
-    throw Error("Expected a FixedSizeList type to have a `listSize` property");
-  }
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a FixedSizeList type to have an array-like `children` property"
-    );
-  }
-  if (typeLike.children.length !== 1) {
-    throw Error("Expected a FixedSizeList type to have exactly one child");
-  }
-  return new FixedSizeList(
-    typeLike.listSize,
-    sanitizeField(typeLike.children[0])
-  );
-}
-
-function sanitizeMap(typeLike: object) {
-  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
-    throw Error(
-      "Expected a Map type to have an array-like `children` property"
-    );
-  }
-  if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
-    throw Error("Expected a Map type to have a `keysSorted` property");
-  }
-  return new Map_(
-    typeLike.children.map((field) => sanitizeField(field)) as any,
-    typeLike.keysSorted
-  );
-}
-
-function sanitizeDuration(typeLike: object) {
-  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
-    throw Error("Expected a Duration type to have a `unit` property");
-  }
-  return new Duration(typeLike.unit);
-}
-
-function sanitizeDictionary(typeLike: object) {
-  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
-    throw Error("Expected a Dictionary type to have an `id` property");
-  }
-  if (!("indices" in typeLike) || typeof typeLike.indices !== "object") {
-    throw Error("Expected a Dictionary type to have an `indices` property");
-  }
-  if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") {
-    throw Error("Expected a Dictionary type to have an `dictionary` property");
-  }
-  if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
-    throw Error("Expected a Dictionary type to have an `isOrdered` property");
-  }
-  return new Dictionary(
-    sanitizeType(typeLike.dictionary),
-    sanitizeType(typeLike.indices) as any,
-    typeLike.id,
-    typeLike.isOrdered
-  );
-}
-
-function sanitizeType(typeLike: unknown): DataType<any> {
-  if (typeof typeLike !== "object" || typeLike === null) {
-    throw Error("Expected a Type but object was null/undefined");
-  }
-  if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
-    throw Error("Expected a Type to have a typeId function");
-  }
-  let typeId: Type;
-  if (typeof typeLike.typeId === "function") {
-    typeId = (typeLike.typeId as () => unknown)() as Type;
-  } else if (typeof typeLike.typeId === "number") {
-    typeId = typeLike.typeId as Type;
-  } else {
-    throw Error("Type's typeId property was not a function or number");
-  }
-
-  switch (typeId) {
-    case Type.NONE:
-      throw Error("Received a Type with a typeId of NONE");
-    case Type.Null:
-      return new Null();
-    case Type.Int:
-      return sanitizeInt(typeLike);
-    case Type.Float:
-      return sanitizeFloat(typeLike);
-    case Type.Binary:
-      return new Binary();
-    case Type.Utf8:
-      return new Utf8();
-    case Type.Bool:
-      return new Bool();
-    case Type.Decimal:
-      return sanitizeDecimal(typeLike);
-    case Type.Date:
-      return sanitizeDate(typeLike);
-    case Type.Time:
-      return sanitizeTime(typeLike);
-    case Type.Timestamp:
-      return sanitizeTimestamp(typeLike);
-    case Type.Interval:
-      return sanitizeInterval(typeLike);
-    case Type.List:
-      return sanitizeList(typeLike);
-    case Type.Struct:
-      return sanitizeStruct(typeLike);
-    case Type.Union:
-      return sanitizeUnion(typeLike);
-    case Type.FixedSizeBinary:
-      return sanitizeFixedSizeBinary(typeLike);
-    case Type.FixedSizeList:
-      return sanitizeFixedSizeList(typeLike);
-    case Type.Map:
-      return sanitizeMap(typeLike);
-    case Type.Duration:
-      return sanitizeDuration(typeLike);
-    case Type.Dictionary:
-      return sanitizeDictionary(typeLike);
-    case Type.Int8:
-      return new Int8();
-    case Type.Int16:
-      return new Int16();
-    case Type.Int32:
-      return new Int32();
-    case Type.Int64:
-      return new Int64();
-    case Type.Uint8:
-      return new Uint8();
-    case Type.Uint16:
-      return new Uint16();
-    case Type.Uint32:
-      return new Uint32();
-    case Type.Uint64:
-      return new Uint64();
-    case Type.Float16:
-      return new Float16();
-    case Type.Float32:
-      return new Float32();
-    case Type.Float64:
-      return new Float64();
-    case Type.DateMillisecond:
-      return new DateMillisecond();
-    case Type.DateDay:
-      return new DateDay();
-    case Type.TimeNanosecond:
-      return new TimeNanosecond();
-    case Type.TimeMicrosecond:
-      return new TimeMicrosecond();
-    case Type.TimeMillisecond:
-      return new TimeMillisecond();
-    case Type.TimeSecond:
-      return new TimeSecond();
-    case Type.TimestampNanosecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampNanosecond);
-    case Type.TimestampMicrosecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond);
-    case Type.TimestampMillisecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampMillisecond);
-    case Type.TimestampSecond:
-      return sanitizeTypedTimestamp(typeLike, TimestampSecond);
-    case Type.DenseUnion:
-      return sanitizeTypedUnion(typeLike, DenseUnion);
-    case Type.SparseUnion:
-      return sanitizeTypedUnion(typeLike, SparseUnion);
-    case Type.IntervalDayTime:
-      return new IntervalDayTime();
-    case Type.IntervalYearMonth:
-      return new IntervalYearMonth();
-    case Type.DurationNanosecond:
-      return new DurationNanosecond();
-    case Type.DurationMicrosecond:
-      return new DurationMicrosecond();
-    case Type.DurationMillisecond:
-      return new DurationMillisecond();
-    case Type.DurationSecond:
-      return new DurationSecond();
-  }
-}
-
-function sanitizeField(fieldLike: unknown): Field {
-  if (fieldLike instanceof Field) {
-    return fieldLike;
-  }
-  if (typeof fieldLike !== "object" || fieldLike === null) {
-    throw Error("Expected a Field but object was null/undefined");
-  }
-  if (
-    !("type" in fieldLike) ||
-    !("name" in fieldLike) ||
-    !("nullable" in fieldLike)
-  ) {
-    throw Error(
-      "The field passed in is missing a `type`/`name`/`nullable` property"
-    );
-  }
-  const type = sanitizeType(fieldLike.type);
-  const name = fieldLike.name;
-  if (!(typeof name === "string")) {
-    throw Error("The field passed in had a non-string `name` property");
-  }
-  const nullable = fieldLike.nullable;
-  if (!(typeof nullable === "boolean")) {
-    throw Error("The field passed in had a non-boolean `nullable` property");
-  }
-  let metadata;
-  if ("metadata" in fieldLike) {
-    metadata = sanitizeMetadata(fieldLike.metadata);
-  }
-  return new Field(name, type, nullable, metadata);
-}
-
-/**
- * Convert something schemaLike into a Schema instance
- *
- * This method is often needed even when the caller is using a Schema
- * instance because they might be using a different instance of apache-arrow
- * than lancedb is using.
- */
-export function sanitizeSchema(schemaLike: unknown): Schema {
-  if (schemaLike instanceof Schema) {
-    return schemaLike;
-  }
-  if (typeof schemaLike !== "object" || schemaLike === null) {
-    throw Error("Expected a Schema but object was null/undefined");
-  }
-  if (!("fields" in schemaLike)) {
-    throw Error(
-      "The schema passed in does not appear to be a schema (no 'fields' property)"
-    );
-  }
-  let metadata;
-  if ("metadata" in schemaLike) {
-    metadata = sanitizeMetadata(schemaLike.metadata);
-  }
-  if (!Array.isArray(schemaLike.fields)) {
-    throw Error(
-      "The schema passed in had a 'fields' property but it was not an array"
-    );
-  }
-  const sanitizedFields = schemaLike.fields.map((field) =>
-    sanitizeField(field)
-  );
-  return new Schema(sanitizedFields, metadata);
-}
--- a/node/src/test/arrow.test.ts
+++ b/node/src/test/arrow.test.ts
@@ -1,360 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { describe } from 'mocha'
-import { assert, expect, use as chaiUse } from 'chai'
-import * as chaiAsPromised from 'chai-as-promised'
-
-import { convertToTable, fromTableToBuffer, makeArrowTable, makeEmptyTable } from '../arrow'
-import {
-  Field,
-  FixedSizeList,
-  Float16,
-  Float32,
-  Int32,
-  tableFromIPC,
-  Schema,
-  Float64,
-  type Table,
-  Binary,
-  Bool,
-  Utf8,
-  Struct,
-  List,
-  DataType,
-  Dictionary,
-  Int64,
-  MetadataVersion
-} from 'apache-arrow'
-import {
-  Dictionary as OldDictionary,
-  Field as OldField,
-  FixedSizeList as OldFixedSizeList,
-  Float32 as OldFloat32,
-  Int32 as OldInt32,
-  Struct as OldStruct,
-  Schema as OldSchema,
-  TimestampNanosecond as OldTimestampNanosecond,
-  Utf8 as OldUtf8
-} from 'apache-arrow-old'
-import { type EmbeddingFunction } from '../embedding/embedding_function'
-
-chaiUse(chaiAsPromised)
-
-function sampleRecords (): Array<Record<string, any>> {
-  return [
-    {
-      binary: Buffer.alloc(5),
-      boolean: false,
-      number: 7,
-      string: 'hello',
-      struct: { x: 0, y: 0 },
-      list: ['anime', 'action', 'comedy']
-    }
-  ]
-}
-
-// Helper method to verify various ways to create a table
-async function checkTableCreation (tableCreationMethod: (records: any, recordsReversed: any, schema: Schema) => Promise<Table>): Promise<void> {
-  const records = sampleRecords()
-  const recordsReversed = [{
-    list: ['anime', 'action', 'comedy'],
-    struct: { x: 0, y: 0 },
-    string: 'hello',
-    number: 7,
-    boolean: false,
-    binary: Buffer.alloc(5)
-  }]
-  const schema = new Schema([
-    new Field('binary', new Binary(), false),
-    new Field('boolean', new Bool(), false),
-    new Field('number', new Float64(), false),
-    new Field('string', new Utf8(), false),
-    new Field('struct', new Struct([
-      new Field('x', new Float64(), false),
-      new Field('y', new Float64(), false)
-    ])),
-    new Field('list', new List(new Field('item', new Utf8(), false)), false)
-  ])
-
-  const table = await tableCreationMethod(records, recordsReversed, schema)
-  schema.fields.forEach((field, idx) => {
-    const actualField = table.schema.fields[idx]
-    assert.isFalse(actualField.nullable)
-    assert.equal(table.getChild(field.name)?.type.toString(), field.type.toString())
-    assert.equal(table.getChildAt(idx)?.type.toString(), field.type.toString())
-  })
-}
-
-describe('The function makeArrowTable', function () {
-  it('will use data types from a provided schema instead of inference', async function () {
-    const schema = new Schema([
-      new Field('a', new Int32()),
-      new Field('b', new Float32()),
-      new Field('c', new FixedSizeList(3, new Field('item', new Float16()))),
-      new Field('d', new Int64())
-    ])
-    const table = makeArrowTable(
-      [
-        { a: 1, b: 2, c: [1, 2, 3], d: 9 },
-        { a: 4, b: 5, c: [4, 5, 6], d: 10 },
-        { a: 7, b: 8, c: [7, 8, 9], d: null }
-      ],
-      { schema }
-    )
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-
-    const actual = tableFromIPC(buf)
-    assert.equal(actual.numRows, 3)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-
-  it('will assume the column `vector` is FixedSizeList<Float32> by default', async function () {
-    const schema = new Schema([
-      new Field('a', new Float64()),
-      new Field('b', new Float64()),
-      new Field(
-        'vector',
-        new FixedSizeList(3, new Field('item', new Float32(), true))
-      )
-    ])
-    const table = makeArrowTable([
-      { a: 1, b: 2, vector: [1, 2, 3] },
-      { a: 4, b: 5, vector: [4, 5, 6] },
-      { a: 7, b: 8, vector: [7, 8, 9] }
-    ])
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-
-    const actual = tableFromIPC(buf)
-    assert.equal(actual.numRows, 3)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-
-  it('can support multiple vector columns', async function () {
-    const schema = new Schema([
-      new Field('a', new Float64()),
-      new Field('b', new Float64()),
-      new Field('vec1', new FixedSizeList(3, new Field('item', new Float16(), true))),
-      new Field('vec2', new FixedSizeList(3, new Field('item', new Float16(), true)))
-    ])
-    const table = makeArrowTable(
-      [
-        { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-        { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-        { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
-      ],
-      {
-        vectorColumns: {
-          vec1: { type: new Float16() },
-          vec2: { type: new Float16() }
-        }
-      }
-    )
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-
-    const actual = tableFromIPC(buf)
-    assert.equal(actual.numRows, 3)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-
-  it('will allow different vector column types', async function () {
-    const table = makeArrowTable(
-      [
-        { fp16: [1], fp32: [1], fp64: [1] }
-      ],
-      {
-        vectorColumns: {
-          fp16: { type: new Float16() },
-          fp32: { type: new Float32() },
-          fp64: { type: new Float64() }
-        }
-      }
-    )
-
-    assert.equal(table.getChild('fp16')?.type.children[0].type.toString(), new Float16().toString())
-    assert.equal(table.getChild('fp32')?.type.children[0].type.toString(), new Float32().toString())
-    assert.equal(table.getChild('fp64')?.type.children[0].type.toString(), new Float64().toString())
-  })
-
-  it('will use dictionary encoded strings if asked', async function () {
-    const table = makeArrowTable([{ str: 'hello' }])
-    assert.isTrue(DataType.isUtf8(table.getChild('str')?.type))
-
-    const tableWithDict = makeArrowTable([{ str: 'hello' }], { dictionaryEncodeStrings: true })
-    assert.isTrue(DataType.isDictionary(tableWithDict.getChild('str')?.type))
-
-    const schema = new Schema([
-      new Field('str', new Dictionary(new Utf8(), new Int32()))
-    ])
-
-    const tableWithDict2 = makeArrowTable([{ str: 'hello' }], { schema })
-    assert.isTrue(DataType.isDictionary(tableWithDict2.getChild('str')?.type))
-  })
-
-  it('will infer data types correctly', async function () {
-    await checkTableCreation(async (records) => makeArrowTable(records))
-  })
-
-  it('will allow a schema to be provided', async function () {
-    await checkTableCreation(async (records, _, schema) => makeArrowTable(records, { schema }))
-  })
-
-  it('will use the field order of any provided schema', async function () {
-    await checkTableCreation(async (_, recordsReversed, schema) => makeArrowTable(recordsReversed, { schema }))
-  })
-
-  it('will make an empty table', async function () {
-    await checkTableCreation(async (_, __, schema) => makeArrowTable([], { schema }))
-  })
-})
-
-class DummyEmbedding implements EmbeddingFunction<string> {
-  public readonly sourceColumn = 'string'
-  public readonly embeddingDimension = 2
-  public readonly embeddingDataType = new Float16()
-
-  async embed (data: string[]): Promise<number[][]> {
-    return data.map(
-      () => [0.0, 0.0]
-    )
-  }
-}
-
-class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
-  public readonly sourceColumn = 'string'
-
-  async embed (data: string[]): Promise<number[][]> {
-    return data.map(
-      () => [0.0, 0.0]
-    )
-  }
-}
-
-describe('convertToTable', function () {
-  it('will infer data types correctly', async function () {
-    await checkTableCreation(async (records) => await convertToTable(records))
-  })
-
-  it('will allow a schema to be provided', async function () {
-    await checkTableCreation(async (records, _, schema) => await convertToTable(records, undefined, { schema }))
-  })
-
-  it('will use the field order of any provided schema', async function () {
-    await checkTableCreation(async (_, recordsReversed, schema) => await convertToTable(recordsReversed, undefined, { schema }))
-  })
-
-  it('will make an empty table', async function () {
-    await checkTableCreation(async (_, __, schema) => await convertToTable([], undefined, { schema }))
-  })
-
-  it('will apply embeddings', async function () {
-    const records = sampleRecords()
-    const table = await convertToTable(records, new DummyEmbedding())
-    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
-    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
-  })
-
-  it('will fail if missing the embedding source column', async function () {
-    return await expect(convertToTable([{ id: 1 }], new DummyEmbedding())).to.be.rejectedWith("'string' was not present")
-  })
-
-  it('use embeddingDimension if embedding missing from table', async function () {
-    const schema = new Schema([
-      new Field('string', new Utf8(), false)
-    ])
-    // Simulate getting an empty Arrow table (minus embedding) from some other source
-    // In other words, we aren't starting with records
-    const table = makeEmptyTable(schema)
-
-    // If the embedding specifies the dimension we are fine
-    await fromTableToBuffer(table, new DummyEmbedding())
-
-    // We can also supply a schema and should be ok
-    const schemaWithEmbedding = new Schema([
-      new Field('string', new Utf8(), false),
-      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
-    ])
-    await fromTableToBuffer(table, new DummyEmbeddingWithNoDimension(), schemaWithEmbedding)
-
-    // Otherwise we will get an error
-    return await expect(fromTableToBuffer(table, new DummyEmbeddingWithNoDimension())).to.be.rejectedWith('does not specify `embeddingDimension`')
-  })
-
-  it('will apply embeddings to an empty table', async function () {
-    const schema = new Schema([
-      new Field('string', new Utf8(), false),
-      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
-    ])
-    const table = await convertToTable([], new DummyEmbedding(), { schema })
-    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
-    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
-  })
-
-  it('will complain if embeddings present but schema missing embedding column', async function () {
-    const schema = new Schema([
-      new Field('string', new Utf8(), false)
-    ])
-    return await expect(convertToTable([], new DummyEmbedding(), { schema })).to.be.rejectedWith('column vector was missing')
-  })
-
-  it('will provide a nice error if run twice', async function () {
-    const records = sampleRecords()
-    const table = await convertToTable(records, new DummyEmbedding())
-    // fromTableToBuffer will try and apply the embeddings again
-    return await expect(fromTableToBuffer(table, new DummyEmbedding())).to.be.rejectedWith('already existed')
-  })
-})
-
-describe('makeEmptyTable', function () {
-  it('will make an empty table', async function () {
-    await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema))
-  })
-})
-
-describe('when using two versions of arrow', function () {
-  it('can still import data', async function() {
-    const schema = new OldSchema([
-      new OldField('id', new OldInt32()),
-      new OldField('vector', new OldFixedSizeList(1024, new OldField("item", new OldFloat32(), true))),
-      new OldField('struct', new OldStruct([
-        new OldField('nested', new OldDictionary(new OldUtf8(), new OldInt32(), 1, true)),
-        new OldField('ts_with_tz', new OldTimestampNanosecond("some_tz")),
-        new OldField('ts_no_tz', new OldTimestampNanosecond(null))
-      ]))
-    ]) as any
-    // We use arrow version 13 to emulate a "foreign arrow" and this version doesn't have metadataVersion
-    // In theory, this wouldn't matter.  We don't rely on that property.  However, it causes deepEqual to
-    // fail so we patch it back in
-    schema.metadataVersion = MetadataVersion.V5
-    const table = makeArrowTable(
-      [],
-      { schema }
-    )
-
-    const buf = await fromTableToBuffer(table)
-    assert.isAbove(buf.byteLength, 0)
-    const actual = tableFromIPC(buf)
-    const actualSchema = actual.schema
-    assert.deepEqual(actualSchema, schema)
-  })
-})
--- a/node/src/test/embedding/openai.ts
+++ b/node/src/test/embedding/openai.ts
@@ -1,55 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { describe } from 'mocha'
-import { assert } from 'chai'
-
-import { OpenAIEmbeddingFunction } from '../../embedding/openai'
-import { isEmbeddingFunction } from '../../embedding/embedding_function'
-
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const OpenAIApi = require('openai')
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const { stub } = require('sinon')
-
-describe('OpenAPIEmbeddings', function () {
-  const stubValue = {
-    data: [
-      {
-        embedding: Array(1536).fill(1.0)
-      },
-      {
-        embedding: Array(1536).fill(2.0)
-      }
-    ]
-  }
-
-  describe('#embed', function () {
-    it('should create vector embeddings', async function () {
-      const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
-      const f = new OpenAIEmbeddingFunction('text', 'sk-key')
-      const vectors = await f.embed(['abc', 'def'])
-      assert.isTrue(openAIStub.calledOnce)
-      assert.equal(vectors.length, 2)
-      assert.deepEqual(vectors[0], stubValue.data[0].embedding)
-      assert.deepEqual(vectors[1], stubValue.data[1].embedding)
-    })
-  })
-
-  describe('isEmbeddingFunction', function () {
-    it('should match the isEmbeddingFunction guard', function () {
-      assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
-    })
-  })
-})
--- a/node/src/test/io.ts
+++ b/node/src/test/io.ts
@@ -1,76 +0,0 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// IO tests
-
-import { describe } from 'mocha'
-import { assert } from 'chai'
-
-import * as lancedb from '../index'
-import { type ConnectionOptions } from '../index'
-
-describe('LanceDB S3 client', function () {
-  if (process.env.TEST_S3_BASE_URL != null) {
-    const baseUri = process.env.TEST_S3_BASE_URL
-    it('should have a valid url', async function () {
-      const opts = { uri: `${baseUri}/valid_url` }
-      const table = await createTestDB(opts, 2, 20)
-      const con = await lancedb.connect(opts)
-      assert.equal(con.uri, opts.uri)
-
-      const results = await table.search([0.1, 0.3]).limit(5).execute()
-      assert.equal(results.length, 5)
-    }).timeout(10_000)
-  } else {
-    describe.skip('Skip S3 test', function () {})
-  }
-
-  if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) {
-    const baseUri = process.env.TEST_S3_BASE_URL
-    it('use custom credentials', async function () {
-      const opts: ConnectionOptions = {
-        uri: `${baseUri}/custom_credentials`,
-        awsCredentials: {
-          accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string,
-          secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string
-        }
-      }
-      const table = await createTestDB(opts, 2, 20)
-      console.log(table)
-      const con = await lancedb.connect(opts)
-      console.log(con)
-      assert.equal(con.uri, opts.uri)
-
-      const results = await table.search([0.1, 0.3]).limit(5).execute()
-      assert.equal(results.length, 5)
-    }).timeout(10_000)
-  } else {
-    describe.skip('Skip S3 test', function () {})
-  }
-})
-
-async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
-  const con = await lancedb.connect(opts)
-
-  const data = []
-  for (let i = 0; i < numRows; i++) {
-    const vector = []
-    for (let j = 0; j < numDimensions; j++) {
-      vector.push(i + (j * 0.1))
-    }
-    data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
-  }
-
-  return await con.createTable('vectors_2', data)
-}
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
--- a/node/src/test/util.ts
+++ b/node/src/test/util.ts
@@ -1,45 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { toSQL } from '../util'
-import * as chai from 'chai'
-
-const expect = chai.expect
-
-describe('toSQL', function () {
-  it('should turn string to SQL expression', function () {
-    expect(toSQL('foo')).to.equal("'foo'")
-  })
-
-  it('should turn number to SQL expression', function () {
-    expect(toSQL(123)).to.equal('123')
-  })
-
-  it('should turn boolean to SQL expression', function () {
-    expect(toSQL(true)).to.equal('TRUE')
-  })
-
-  it('should turn null to SQL expression', function () {
-    expect(toSQL(null)).to.equal('NULL')
-  })
-
-  it('should turn Date to SQL expression', function () {
-    const date = new Date('05 October 2011 14:48 UTC')
-    expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'")
-  })
-
-  it('should turn array to SQL expression', function () {
-    expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]")
-  })
-})
--- a/node/src/util.ts
+++ b/node/src/util.ts
@@ -1,77 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-export type Literal = string | number | boolean | null | Date | Literal[]
-
-export function toSQL (value: Literal): string {
-  if (typeof value === 'string') {
-    return `'${value}'`
-  }
-
-  if (typeof value === 'number') {
-    return value.toString()
-  }
-
-  if (typeof value === 'boolean') {
-    return value ? 'TRUE' : 'FALSE'
-  }
-
-  if (value === null) {
-    return 'NULL'
-  }
-
-  if (value instanceof Date) {
-    return `'${value.toISOString()}'`
-  }
-
-  if (Array.isArray(value)) {
-    return `[${value.map(toSQL).join(', ')}]`
-  }
-
-  // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
-}
-
-export class TTLCache {
-  private readonly cache: Map<string, { value: any, expires: number }>
-
-  /**
-   * @param ttl Time to live in milliseconds
-   */
-  constructor (private readonly ttl: number) {
-    this.cache = new Map()
-  }
-
-  get (key: string): any | undefined {
-    const entry = this.cache.get(key)
-    if (entry === undefined) {
-      return undefined
-    }
-
-    if (entry.expires < Date.now()) {
-      this.cache.delete(key)
-      return undefined
-    }
-
-    return entry.value
-  }
-
-  set (key: string, value: any): void {
-    this.cache.set(key, { value, expires: Date.now() + this.ttl })
-  }
-
-  delete (key: string): void {
-    this.cache.delete(key)
-  }
-}
--- a/node/tsconfig.json
+++ b/node/tsconfig.json
@@ -1,14 +0,0 @@
-{
-  "include": [
-    "src/**/*.ts",
-    "src/*.ts"
-  ],
-  "compilerOptions": {
-    "target": "ES2020",
-    "module": "commonjs",
-    "declaration": true,
-    "outDir": "./dist",
-    "strict": true,
-    "sourceMap": true,
-  }
-}
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.2-beta.1"
+version = "0.21.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,7 +1,16 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-import { Bool, Field, Int32, List, Schema, Struct, Utf8 } from "apache-arrow";
+import {
+  Bool,
+  Field,
+  Int32,
+  List,
+  Schema,
+  Struct,
+  Uint8,
+  Utf8,
+} from "apache-arrow";

 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
@@ -255,6 +264,98 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actualSchema).toEqual(schema);
      });

+      it("will detect vector columns when name contains 'vector' or 'embedding'", async function () {
+        // Test various naming patterns that should be detected as vector columns
+        const floatVectorTable = makeArrowTable([
+          {
+            // Float vectors (use decimal values to ensure they're treated as floats)
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            user_vector: [1.1, 2.2],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            text_embedding: [3.3, 4.4],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            doc_embeddings: [5.5, 6.6],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            my_vector_field: [7.7, 8.8],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            embedding_model: [9.9, 10.1],
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            VECTOR_COL: [11.1, 12.2], // uppercase
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            Vector_Mixed: [13.3, 14.4], // mixed case
+          },
+        ]);
+
+        // Check that columns with 'vector' or 'embedding' in name are converted to FixedSizeList
+        const floatVectorColumns = [
+          "user_vector",
+          "text_embedding",
+          "doc_embeddings",
+          "my_vector_field",
+          "embedding_model",
+          "VECTOR_COL",
+          "Vector_Mixed",
+        ];
+
+        for (const columnName of floatVectorColumns) {
+          expect(
+            DataType.isFixedSizeList(
+              floatVectorTable.getChild(columnName)?.type,
+            ),
+          ).toBe(true);
+          // Check that float vectors use Float32 by default
+          expect(
+            floatVectorTable
+              .getChild(columnName)
+              ?.type.children[0].type.toString(),
+          ).toEqual(new Float32().toString());
+        }
+
+        // Test that regular integer arrays still get treated as float vectors
+        // (since JavaScript doesn't distinguish integers from floats at runtime)
+        const integerArrayTable = makeArrowTable([
+          {
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            vector_int: [1, 2], // Regular array with integers - should be Float32
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            embedding_int: [3, 4], // Regular array with integers - should be Float32
+          },
+        ]);
+
+        const integerArrayColumns = ["vector_int", "embedding_int"];
+
+        for (const columnName of integerArrayColumns) {
+          expect(
+            DataType.isFixedSizeList(
+              integerArrayTable.getChild(columnName)?.type,
+            ),
+          ).toBe(true);
+          // Regular integer arrays should use Float32 (avoiding false positives)
+          expect(
+            integerArrayTable
+              .getChild(columnName)
+              ?.type.children[0].type.toString(),
+          ).toEqual(new Float32().toString());
+        }
+
+        // Test normal list should NOT be converted to FixedSizeList
+        const normalListTable = makeArrowTable([
+          {
+            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
+            normal_list: [15.5, 16.6], // should NOT be detected as vector
+          },
+        ]);
+
+        expect(
+          DataType.isFixedSizeList(
+            normalListTable.getChild("normal_list")?.type,
+          ),
+        ).toBe(false);
+        expect(
+          DataType.isList(normalListTable.getChild("normal_list")?.type),
+        ).toBe(true);
+      });
+
      it("will allow different vector column types", async function () {
        const table = makeArrowTable([{ fp16: [1], fp32: [1], fp64: [1] }], {
          vectorColumns: {
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -42,6 +42,28 @@ describe("remote connection", () => {
    });
  });

+  it("should accept overall timeout configuration", async () => {
+    await connect("db://test", {
+      apiKey: "fake",
+      clientConfig: {
+        timeoutConfig: { timeout: 30 },
+      },
+    });
+
+    // Test with all timeout parameters
+    await connect("db://test", {
+      apiKey: "fake",
+      clientConfig: {
+        timeoutConfig: {
+          timeout: 60,
+          connectTimeout: 10,
+          readTimeout: 20,
+          poolIdleTimeout: 300,
+        },
+      },
+    });
+  });
+
  it("should pass down apiKey and userAgent", async () => {
    await withMockDatabase(
      (req, res) => {
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -582,7 +582,7 @@ describe("When creating an index", () => {
      "Invalid input, minimum_nprobes must be greater than 0",
    );
    expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
-      "Invalid input, maximum_nprobes must be greater than minimum_nprobes",
+      "Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes",
    );

    await tbl.dropIndex("vec_idx");
--- a/nodejs/examples/package-lock.json
+++ b/nodejs/examples/package-lock.json
@@ -30,7 +30,7 @@
        "x64",
        "arm64"
      ],
-      "license": "Apache 2.0",
+      "license": "Apache-2.0",
      "os": [
        "darwin",
        "linux",
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -34,6 +34,7 @@ import {
  Struct,
  Timestamp,
  Type,
+  Uint8,
  Utf8,
  Vector,
  makeVector as arrowMakeVector,
@@ -51,6 +52,15 @@ import {
  sanitizeTable,
  sanitizeType,
 } from "./sanitize";
+
+/**
+ * Check if a field name indicates a vector column.
+ */
+function nameSuggestsVectorColumn(fieldName: string): boolean {
+  const nameLower = fieldName.toLowerCase();
+  return nameLower.includes("vector") || nameLower.includes("embedding");
+}
+
 export * from "apache-arrow";
 export type SchemaLike =
  | Schema
@@ -591,10 +601,17 @@ function inferType(
      return undefined;
    }
    // Try to automatically detect embedding columns.
-    if (valueType instanceof Float && path[path.length - 1] === "vector") {
-      // We default to Float32 for vectors.
-      const child = new Field("item", new Float32(), true);
-      return new FixedSizeList(value.length, child);
+    if (nameSuggestsVectorColumn(path[path.length - 1])) {
+      // Check if value is a Uint8Array for integer vector type determination
+      if (value instanceof Uint8Array) {
+        // For integer vectors, we default to Uint8 (matching Python implementation)
+        const child = new Field("item", new Uint8(), true);
+        return new FixedSizeList(value.length, child);
+      } else {
+        // For float vectors, we default to Float32
+        const child = new Field("item", new Float32(), true);
+        return new FixedSizeList(value.length, child);
+      }
    } else {
      const child = new Field("item", valueType, true);
      return new List(child);
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
 	"files": ["lancedb.darwin-arm64.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
 	"files": ["lancedb.darwin-x64.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
 	"files": ["lancedb.linux-arm64-gnu.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,13 +1,13 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
 	"files": ["lancedb.linux-arm64-musl.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
 	"libc": ["musl"]
-}
+}
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
 	"files": ["lancedb.linux-x64-gnu.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,13 +1,13 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
 	"files": ["lancedb.linux-x64-musl.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	},
 	"libc": ["musl"]
-}
+}
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.21.2-beta.1",
+  "version": "0.21.2",
  "os": [
    "win32"
  ],
@@ -11,7 +11,7 @@
  "files": [
    "lancedb.win32-arm64-msvc.node"
  ],
-  "license": "Apache 2.0",
+  "license": "Apache-2.0",
  "engines": {
    "node": ">= 18"
  }
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.21.2-beta.1",
+	"version": "0.21.2",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
 	"files": ["lancedb.win32-x64-msvc.node"],
-	"license": "Apache 2.0",
+	"license": "Apache-2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,17 +1,17 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.21.2-beta.1",
+  "version": "0.21.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.21.2-beta.1",
+      "version": "0.21.2",
      "cpu": [
        "x64",
        "arm64"
      ],
-      "license": "Apache 2.0",
+      "license": "Apache-2.0",
      "os": [
        "darwin",
        "linux",
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.21.2-beta.1",
+  "version": "0.21.2",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
@@ -36,7 +36,7 @@
      ]
    }
  },
-  "license": "Apache 2.0",
+  "license": "Apache-2.0",
  "devDependencies": {
    "@aws-sdk/client-dynamodb": "^3.33.0",
    "@aws-sdk/client-kms": "^3.33.0",
--- a/nodejs/src/remote.rs
+++ b/nodejs/src/remote.rs
@@ -9,6 +9,12 @@ use napi_derive::*;
 #[napi(object)]
 #[derive(Debug)]
 pub struct TimeoutConfig {
+    /// The overall timeout for the entire request in seconds. This includes
+    /// connection, send, and read time. If the entire request doesn't complete
+    /// within this time, it will fail. Default is None (no overall timeout).
+    /// This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
+    /// as an integer number of seconds.
+    pub timeout: Option<f64>,
    /// The timeout for establishing a connection in seconds. Default is 120
    /// seconds (2 minutes). This can also be set via the environment variable
    /// `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds.
@@ -75,6 +81,7 @@ pub struct ClientConfig {
 impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
    fn from(config: TimeoutConfig) -> Self {
        Self {
+            timeout: config.timeout.map(std::time::Duration::from_secs_f64),
            connect_timeout: config
                .connect_timeout
                .map(std::time::Duration::from_secs_f64),
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -1,2 +1,3 @@
 # Test data created by some example tests
-data/
+data/
+_lancedb.pyd
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -68,8 +68,9 @@ dev = [
    "pyright",
    'typing-extensions>=4.0.0; python_version < "3.11"',
 ]
-docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
+docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings-python"]
 clip = ["torch", "pillow", "open-clip-torch"]
+siglip = ["torch", "pillow", "transformers>=4.41.0","sentencepiece"]
 embeddings = [
    "requests>=2.31.0",
    "openai>=1.6.1",
@@ -87,6 +88,7 @@ embeddings = [
    "botocore>=1.31.57",
    'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
    "ollama>=0.3.0",
+    "sentencepiece"
 ]
 azure = ["adlfs>=2024.2.0"]

--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -241,4 +241,4 @@ def __warn_on_fork():


 if hasattr(os, "register_at_fork"):
-    os.register_at_fork(before=__warn_on_fork)
+    os.register_at_fork(before=__warn_on_fork)  # type: ignore[attr-defined]
--- a/python/python/lancedb/embeddings/init.py
+++ b/python/python/lancedb/embeddings/init.py
@@ -20,3 +20,4 @@ from .jinaai import JinaEmbeddings
 from .watsonx import WatsonxEmbeddings
 from .voyageai import VoyageAIEmbeddingFunction
 from .colpali import ColPaliEmbeddings
+from .siglip import SigLipEmbeddings
--- a/python/python/lancedb/embeddings/siglip.py
+++ b/python/python/lancedb/embeddings/siglip.py
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import concurrent.futures
+import io
+import os
+from typing import TYPE_CHECKING, List, Union
+import urllib.parse as urlparse
+
+import numpy as np
+import pyarrow as pa
+from tqdm import tqdm
+from pydantic import PrivateAttr
+
+from ..util import attempt_import_or_raise
+from .base import EmbeddingFunction
+from .registry import register
+from .utils import IMAGES, url_retrieve
+
+if TYPE_CHECKING:
+    import PIL
+    import torch
+
+
+@register("siglip")
+class SigLipEmbeddings(EmbeddingFunction):
+    model_name: str = "google/siglip-base-patch16-224"
+    device: str = "cpu"
+    batch_size: int = 64
+    normalize: bool = True
+
+    _model = PrivateAttr()
+    _processor = PrivateAttr()
+    _tokenizer = PrivateAttr()
+    _torch = PrivateAttr()
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        transformers = attempt_import_or_raise("transformers")
+        self._torch = attempt_import_or_raise("torch")
+
+        self._processor = transformers.AutoProcessor.from_pretrained(self.model_name)
+        self._model = transformers.SiglipModel.from_pretrained(self.model_name)
+        self._model.to(self.device)
+        self._model.eval()
+        self._ndims = None
+
+    def ndims(self):
+        if self._ndims is None:
+            self._ndims = self.generate_text_embeddings("foo").shape[0]
+        return self._ndims
+
+    def compute_query_embeddings(
+        self, query: Union[str, "PIL.Image.Image"], *args, **kwargs
+    ) -> List[np.ndarray]:
+        if isinstance(query, str):
+            return [self.generate_text_embeddings(query)]
+        else:
+            PIL = attempt_import_or_raise("PIL", "pillow")
+            if isinstance(query, PIL.Image.Image):
+                return [self.generate_image_embedding(query)]
+            else:
+                raise TypeError("SigLIP supports str or PIL Image as query")
+
+    def generate_text_embeddings(self, text: str) -> np.ndarray:
+        torch = self._torch
+        text_inputs = self._processor(
+            text=text,
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=64,
+        ).to(self.device)
+
+        with torch.no_grad():
+            text_features = self._model.get_text_features(**text_inputs)
+            if self.normalize:
+                text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+            return text_features.cpu().detach().numpy().squeeze()
+
+    def sanitize_input(self, images: IMAGES) -> Union[List[bytes], np.ndarray]:
+        if isinstance(images, (str, bytes)):
+            images = [images]
+        elif isinstance(images, pa.Array):
+            images = images.to_pylist()
+        elif isinstance(images, pa.ChunkedArray):
+            images = images.combine_chunks().to_pylist()
+        return images
+
+    def compute_source_embeddings(
+        self, images: IMAGES, *args, **kwargs
+    ) -> List[np.ndarray]:
+        images = self.sanitize_input(images)
+        embeddings = []
+
+        for i in range(0, len(images), self.batch_size):
+            j = min(i + self.batch_size, len(images))
+            batch = images[i:j]
+            embeddings.extend(self._parallel_get(batch))
+        return embeddings
+
+    def _parallel_get(self, images: Union[List[str], List[bytes]]) -> List[np.ndarray]:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [
+                executor.submit(self.generate_image_embedding, image)
+                for image in images
+            ]
+            return [f.result() for f in tqdm(futures, desc="SigLIP Embedding")]
+
+    def generate_image_embedding(
+        self, image: Union[str, bytes, "PIL.Image.Image"]
+    ) -> np.ndarray:
+        image = self._to_pil(image)
+        image = self._processor(images=image, return_tensors="pt")["pixel_values"]
+        return self._encode_and_normalize_image(image)
+
+    def _encode_and_normalize_image(self, image_tensor: "torch.Tensor") -> np.ndarray:
+        torch = self._torch
+        with torch.no_grad():
+            image_features = self._model.get_image_features(
+                image_tensor.to(self.device)
+            )
+            if self.normalize:
+                image_features = image_features / image_features.norm(
+                    dim=-1, keepdim=True
+                )
+            return image_features.cpu().detach().numpy().squeeze()
+
+    def _to_pil(self, image: Union[str, bytes, "PIL.Image.Image"]):
+        PIL = attempt_import_or_raise("PIL", "pillow")
+        if isinstance(image, PIL.Image.Image):
+            return image.convert("RGB") if image.mode != "RGB" else image
+        elif isinstance(image, bytes):
+            return PIL.Image.open(io.BytesIO(image)).convert("RGB")
+        elif isinstance(image, str):
+            parsed = urlparse.urlparse(image)
+            if parsed.scheme == "file":
+                return PIL.Image.open(parsed.path).convert("RGB")
+            elif parsed.scheme == "":
+                path = image if os.name == "nt" else parsed.path
+                return PIL.Image.open(path).convert("RGB")
+            elif parsed.scheme.startswith("http"):
+                image_bytes = url_retrieve(image)
+                return PIL.Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            else:
+                raise NotImplementedError("Only local and http(s) urls are supported")
+        else:
+            raise ValueError(f"Unsupported image type: {type(image)}")
--- a/python/python/lancedb/remote/init.py
+++ b/python/python/lancedb/remote/init.py
@@ -17,6 +17,12 @@ class TimeoutConfig:

    Attributes
    ----------
+    timeout: Optional[timedelta]
+        The overall timeout for the entire request. This includes connection,
+        send, and read time. If the entire request doesn't complete within
+        this time, it will fail. Default is None (no overall timeout).
+        This can also be set via the environment variable
+        `LANCE_CLIENT_TIMEOUT`, as an integer number of seconds.
    connect_timeout: Optional[timedelta]
        The timeout for establishing a connection. Default is 120 seconds (2 minutes).
        This can also be set via the environment variable
@@ -31,6 +37,7 @@ class TimeoutConfig:
        `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer number of seconds.
    """

+    timeout: Optional[timedelta] = None
    connect_timeout: Optional[timedelta] = None
    read_timeout: Optional[timedelta] = None
    pool_idle_timeout: Optional[timedelta] = None
@@ -50,6 +57,7 @@ class TimeoutConfig:
            )

    def __post_init__(self):
+        self.timeout = self.__to_timedelta(self.timeout)
        self.connect_timeout = self.__to_timedelta(self.connect_timeout)
        self.read_timeout = self.__to_timedelta(self.read_timeout)
        self.pool_idle_timeout = self.__to_timedelta(self.pool_idle_timeout)
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -2926,6 +2926,12 @@ def has_nan_values(arr: Union[pa.ListArray, pa.ChunkedArray]) -> pa.BooleanArray
    return pc.is_in(indices, has_nan_indices)


+def _name_suggests_vector_column(field_name: str) -> bool:
+    """Check if a field name indicates a vector column."""
+    name_lower = field_name.lower()
+    return "vector" in name_lower or "embedding" in name_lower
+
+
 def _infer_target_schema(
    reader: pa.RecordBatchReader,
 ) -> Tuple[pa.Schema, pa.RecordBatchReader]:
@@ -2933,35 +2939,27 @@ def _infer_target_schema(
    peeked = None

    for i, field in enumerate(schema):
-        if (
-            field.name == VECTOR_COLUMN_NAME
-            and (pa.types.is_list(field.type) or pa.types.is_large_list(field.type))
-            and pa.types.is_floating(field.type.value_type)
-        ):
+        is_list_type = pa.types.is_list(field.type) or pa.types.is_large_list(
+            field.type
+        )
+
+        if _name_suggests_vector_column(field.name) and is_list_type:
            if peeked is None:
                peeked, reader = peek_reader(reader)
            # Use the most common length of the list as the dimensions
            dim = _modal_list_size(peeked.column(i))

-            new_field = pa.field(
-                VECTOR_COLUMN_NAME,
-                pa.list_(pa.float32(), dim),
-                nullable=field.nullable,
-            )
+            # Determine target type based on value type
+            if pa.types.is_floating(field.type.value_type):
+                target_type = pa.list_(pa.float32(), dim)
+            elif pa.types.is_integer(field.type.value_type):
+                target_type = pa.list_(pa.uint8(), dim)
+            else:
+                continue  # Skip non-numeric types

-            schema = schema.set(i, new_field)
-        elif (
-            field.name == VECTOR_COLUMN_NAME
-            and (pa.types.is_list(field.type) or pa.types.is_large_list(field.type))
-            and pa.types.is_integer(field.type.value_type)
-        ):
-            if peeked is None:
-                peeked, reader = peek_reader(reader)
-            # Use the most common length of the list as the dimensions
-            dim = _modal_list_size(peeked.column(i))
            new_field = pa.field(
-                VECTOR_COLUMN_NAME,
-                pa.list_(pa.uint8(), dim),
+                field.name,  # preserve original field name
+                target_type,
                nullable=field.nullable,
            )

@@ -3673,9 +3671,14 @@ class AsyncTable:
            )
            if query.distance_type is not None:
                async_query = async_query.distance_type(query.distance_type)
-            if query.minimum_nprobes is not None:
+            if query.minimum_nprobes is not None and query.maximum_nprobes is not None:
+                # Set both to the minimum first to avoid min > max error.
+                async_query = async_query.nprobes(
+                    query.minimum_nprobes
+                ).maximum_nprobes(query.maximum_nprobes)
+            elif query.minimum_nprobes is not None:
                async_query = async_query.minimum_nprobes(query.minimum_nprobes)
-            if query.maximum_nprobes is not None:
+            elif query.maximum_nprobes is not None:
                async_query = async_query.maximum_nprobes(query.maximum_nprobes)
            if query.refine_factor is not None:
                async_query = async_query.refine_factor(query.refine_factor)
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -4,7 +4,6 @@
 import importlib
 import io
 import os
-
 import lancedb
 import numpy as np
 import pandas as pd
@@ -12,7 +11,6 @@ import pyarrow as pa
 import pytest
 from lancedb.embeddings import get_registry
 from lancedb.pydantic import LanceModel, Vector, MultiVector
-import requests

 # These are integration tests for embedding functions.
 # They are slow because they require downloading models
@@ -98,9 +96,34 @@ def test_basic_text_embeddings(alias, tmp_path):
    assert not np.allclose(actual.vector, actual.vector2)


-@pytest.mark.slow
-def test_openclip(tmp_path):
+@pytest.fixture(scope="module")
+def test_images():
    import requests
+
+    labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
+    uris = [
+        "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
+        "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
+        "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
+        "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
+        "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
+        "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
+    ]
+    image_bytes = [requests.get(uri).content for uri in uris]
+    return labels, uris, image_bytes
+
+
+@pytest.fixture(scope="module")
+def query_image_bytes():
+    import requests
+
+    query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
+    image_bytes = requests.get(query_image_uri).content
+    return image_bytes
+
+
+@pytest.mark.slow
+def test_openclip(tmp_path, test_images, query_image_bytes):
    from PIL import Image

    db = lancedb.connect(tmp_path)
@@ -114,20 +137,12 @@ def test_openclip(tmp_path):
        vector: Vector(func.ndims()) = func.VectorField()
        vec_from_bytes: Vector(func.ndims()) = func.VectorField()

+    labels, uris, image_bytes_list = test_images
    table = db.create_table("images", schema=Images)
-    labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
-    uris = [
-        "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
-        "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
-        "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
-        "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
-        "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
-        "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
-    ]
-    # get each uri as bytes
-    image_bytes = [requests.get(uri).content for uri in uris]
    table.add(
-        pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": image_bytes})
+        pd.DataFrame(
+            {"label": labels, "image_uri": uris, "image_bytes": image_bytes_list}
+        )
    )

    # text search
@@ -146,9 +161,7 @@ def test_openclip(tmp_path):
    assert np.allclose(actual.vector, frombytes.vector)

    # image search
-    query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
-    image_bytes = requests.get(query_image_uri).content
-    query_image = Image.open(io.BytesIO(image_bytes))
+    query_image = Image.open(io.BytesIO(query_image_bytes))
    actual = (
        table.search(query_image, vector_column_name="vector")
        .limit(1)
@@ -524,6 +537,8 @@ def test_voyageai_embedding_function():
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
 )
 def test_voyageai_multimodal_embedding_function():
+    import requests
+
    voyageai = (
        get_registry().get("voyageai").create(name="voyage-multimodal-3", max_retries=0)
    )
@@ -639,3 +654,71 @@ def test_colpali(tmp_path):
    assert len(first_row["image_vectors"][0]) == func.ndims(), (
        "Vector dimension mismatch"
    )
+
+
+@pytest.mark.slow
+def test_siglip(tmp_path, test_images, query_image_bytes):
+    from PIL import Image
+
+    labels, uris, image_bytes = test_images
+
+    db = lancedb.connect(tmp_path)
+    registry = get_registry()
+    func = registry.get("siglip").create(max_retries=0)
+
+    class Images(LanceModel):
+        label: str
+        image_uri: str = func.SourceField()
+        image_bytes: bytes = func.SourceField()
+        vector: Vector(func.ndims()) = func.VectorField()
+        vec_from_bytes: Vector(func.ndims()) = func.VectorField()
+
+    table = db.create_table("images", schema=Images)
+
+    table.add(
+        pd.DataFrame(
+            {
+                "label": labels,
+                "image_uri": uris,
+                "image_bytes": image_bytes,
+            }
+        )
+    )
+
+    # Text search
+    actual = (
+        table.search("man's best friend", vector_column_name="vector")
+        .limit(1)
+        .to_pydantic(Images)[0]
+    )
+    assert actual.label == "dog"
+
+    frombytes = (
+        table.search("man's best friend", vector_column_name="vec_from_bytes")
+        .limit(1)
+        .to_pydantic(Images)[0]
+    )
+    assert actual.label == frombytes.label
+    assert np.allclose(actual.vector, frombytes.vector)
+
+    # Image search
+    query_image = Image.open(io.BytesIO(query_image_bytes))
+    actual = (
+        table.search(query_image, vector_column_name="vector")
+        .limit(1)
+        .to_pydantic(Images)[0]
+    )
+    assert actual.label == "dog"
+
+    other = (
+        table.search(query_image, vector_column_name="vec_from_bytes")
+        .limit(1)
+        .to_pydantic(Images)[0]
+    )
+    assert actual.label == other.label
+
+    arrow_table = table.search().select(["vector", "vec_from_bytes"]).to_arrow()
+    assert np.allclose(
+        arrow_table["vector"].combine_chunks().values.to_numpy(),
+        arrow_table["vec_from_bytes"].combine_chunks().values.to_numpy(),
+    )
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -445,25 +445,45 @@ def test_invalid_nprobes_sync(table):
    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list()
    with pytest.raises(
-        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
+        ValueError,
+        match="maximum_nprobes must be greater than or equal to minimum_nprobes",
    ):
        LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list()
    with pytest.raises(
-        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
+        ValueError,
+        match="minimum_nprobes must be less than or equal to maximum_nprobes",
    ):
        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list()


+def test_nprobes_works_sync(table):
+    LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).to_list()
+
+
+def test_nprobes_min_max_works_sync(table):
+    LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(
+        4
+    ).to_list()
+
+
+def test_multiple_nprobes_calls_works_sync(table):
+    LanceVectorQueryBuilder(table, [0, 0], "vector").nprobes(30).maximum_nprobes(
+        20
+    ).minimum_nprobes(20).to_list()
+
+
@pytest.mark.asyncio
 async def test_invalid_nprobes_async(table_async: AsyncTable):
    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
        await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list()
    with pytest.raises(
-        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
+        ValueError,
+        match="maximum_nprobes must be greater than or equal to minimum_nprobes",
    ):
        await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list()
    with pytest.raises(
-        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
+        ValueError,
+        match="minimum_nprobes must be less than or equal to maximum_nprobes",
    ):
        await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list()

--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -798,6 +798,21 @@ def test_create_client():
    assert isinstance(db.client_config, ClientConfig)
    assert db.client_config.timeout_config.connect_timeout == timedelta(seconds=42)

+    # Test overall timeout parameter
+    db = lancedb.connect(
+        **mandatory_args,
+        client_config=ClientConfig(timeout_config={"timeout": 60}),
+    )
+    assert isinstance(db.client_config, ClientConfig)
+    assert db.client_config.timeout_config.timeout == timedelta(seconds=60)
+
+    db = lancedb.connect(
+        **mandatory_args,
+        client_config={"timeout_config": {"timeout": timedelta(seconds=60)}},
+    )
+    assert isinstance(db.client_config, ClientConfig)
+    assert db.client_config.timeout_config.timeout == timedelta(seconds=60)
+
    db = lancedb.connect(
        **mandatory_args, client_config=ClientConfig(retry_config={"retries": 42})
    )
--- a/python/python/tests/test_util.py
+++ b/python/python/tests/test_util.py
@@ -390,6 +390,87 @@ def test_infer_target_schema():
    assert output == expected


+def test_infer_target_schema_with_vector_embedding_names():
+    """Test that _infer_target_schema detects vector columns with 'vector'/'embedding'.
+
+    This tests the enhanced column name detection for vector inference.
+    """
+
+    # Test float vectors with various naming patterns
+    example = pa.schema(
+        {
+            "user_vector": pa.list_(pa.float64()),
+            "text_embedding": pa.list_(pa.float64()),
+            "doc_embeddings": pa.list_(pa.float64()),
+            "my_vector_field": pa.list_(pa.float64()),
+            "embedding_model": pa.list_(pa.float64()),
+            "VECTOR_COL": pa.list_(pa.float64()),  # uppercase
+            "Vector_Mixed": pa.list_(pa.float64()),  # mixed case
+            "normal_list": pa.list_(pa.float64()),  # should not be converted
+        }
+    )
+    data = pa.table(
+        {
+            "user_vector": [[1.0, 2.0]],
+            "text_embedding": [[3.0, 4.0]],
+            "doc_embeddings": [[5.0, 6.0]],
+            "my_vector_field": [[7.0, 8.0]],
+            "embedding_model": [[9.0, 10.0]],
+            "VECTOR_COL": [[11.0, 12.0]],
+            "Vector_Mixed": [[13.0, 14.0]],
+            "normal_list": [[15.0, 16.0]],
+        },
+        schema=example,
+    )
+
+    expected = pa.schema(
+        {
+            "user_vector": pa.list_(pa.float32(), 2),  # converted
+            "text_embedding": pa.list_(pa.float32(), 2),  # converted
+            "doc_embeddings": pa.list_(pa.float32(), 2),  # converted
+            "my_vector_field": pa.list_(pa.float32(), 2),  # converted
+            "embedding_model": pa.list_(pa.float32(), 2),  # converted
+            "VECTOR_COL": pa.list_(pa.float32(), 2),  # converted
+            "Vector_Mixed": pa.list_(pa.float32(), 2),  # converted
+            "normal_list": pa.list_(pa.float64()),  # not converted
+        }
+    )
+
+    output, _ = _infer_target_schema(data.to_reader())
+    assert output == expected
+
+    # Test integer vectors with various naming patterns
+    example_int = pa.schema(
+        {
+            "user_vector": pa.list_(pa.int32()),
+            "text_embedding": pa.list_(pa.int64()),
+            "doc_embeddings": pa.list_(pa.int16()),
+            "normal_list": pa.list_(pa.int32()),  # should not be converted
+        }
+    )
+    data_int = pa.table(
+        {
+            "user_vector": [[1, 2]],
+            "text_embedding": [[3, 4]],
+            "doc_embeddings": [[5, 6]],
+            "normal_list": [[7, 8]],
+        },
+        schema=example_int,
+    )
+
+    expected_int = pa.schema(
+        {
+            "user_vector": pa.list_(pa.uint8(), 2),  # converted
+            "text_embedding": pa.list_(pa.uint8(), 2),  # converted
+            "doc_embeddings": pa.list_(pa.uint8(), 2),  # converted
+            "normal_list": pa.list_(pa.int32()),  # not converted
+        }
+    )
+
+    output_int, _ = _infer_target_schema(data_int.to_reader())
+    assert output_int == expected_int
+
+
@pytest.mark.parametrize(
    "data",
    [
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -241,6 +241,7 @@ pub struct PyClientRetryConfig {

 #[derive(FromPyObject)]
 pub struct PyClientTimeoutConfig {
+    timeout: Option<Duration>,
    connect_timeout: Option<Duration>,
    read_timeout: Option<Duration>,
    pool_idle_timeout: Option<Duration>,
@@ -264,6 +265,7 @@ impl From<PyClientRetryConfig> for lancedb::remote::RetryConfig {
 impl From<PyClientTimeoutConfig> for lancedb::remote::TimeoutConfig {
    fn from(value: PyClientTimeoutConfig) -> Self {
        Self {
+            timeout: value.timeout,
            connect_timeout: value.connect_timeout,
            read_timeout: value.read_timeout,
            pool_idle_timeout: value.pool_idle_timeout,
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,42 +0,0 @@
-[package]
-name = "lancedb-node"
-version = "0.21.2-beta.1"
-description = "Serverless, low-latency vector database for AI applications"
-license.workspace = true
-edition.workspace = true
-repository.workspace = true
-keywords.workspace = true
-categories.workspace = true
-exclude = ["index.node"]
-rust-version = "1.75"
-
-[lib]
-crate-type = ["cdylib"]
-
-[dependencies]
-arrow-array = { workspace = true }
-arrow-ipc = { workspace = true }
-arrow-schema = { workspace = true }
-chrono = { workspace = true }
-conv = "0.3.3"
-once_cell = "1"
-futures = "0.3"
-half = { workspace = true }
-lance = { workspace = true }
-lance-index = { workspace = true }
-lance-linalg = { workspace = true }
-lancedb = { path = "../../lancedb" }
-tokio = { version = "1.23", features = ["rt-multi-thread"] }
-neon = { version = "0.10.1", default-features = false, features = [
-    "channel-api",
-    "napi-6",
-    "promise-api",
-    "task-api",
-] }
-object_store = { workspace = true, features = ["aws"] }
-snafu = { workspace = true }
-async-trait = "0"
-env_logger = "0"
-
-# Prevent dynamic linking of lzma, which comes from datafusion
-lzma-sys = { version = "*", features = ["static"] }
--- a/rust/ffi/node/README.md
+++ b/rust/ffi/node/README.md
@@ -1,3 +0,0 @@
-The LanceDB node bridge (lancedb-node) allows javascript applications to access LanceDB datasets.
-
-It is build using [Neon](https://neon-bindings.com). See the node project for an example of how it is used / tests
--- a/rust/ffi/node/src/arrow.rs
+++ b/rust/ffi/node/src/arrow.rs
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use std::io::Cursor;
-use std::ops::Deref;
-
-use arrow_array::RecordBatch;
-use arrow_ipc::reader::FileReader;
-use arrow_ipc::writer::FileWriter;
-use arrow_schema::SchemaRef;
-
-use crate::error::Result;
-
-pub fn arrow_buffer_to_record_batch(slice: &[u8]) -> Result<(Vec<RecordBatch>, SchemaRef)> {
-    let mut batches: Vec<RecordBatch> = Vec::new();
-    let file_reader = FileReader::try_new(Cursor::new(slice), None)?;
-    let schema = file_reader.schema();
-    for b in file_reader {
-        let record_batch = b?;
-        batches.push(record_batch);
-    }
-    Ok((batches, schema))
-}
-
-pub fn record_batch_to_buffer(batches: Vec<RecordBatch>) -> Result<Vec<u8>> {
-    if batches.is_empty() {
-        return Ok(Vec::new());
-    }
-
-    let schema = batches.first().unwrap().schema();
-    let mut fr = FileWriter::try_new(Vec::new(), schema.deref())?;
-    for batch in batches.iter() {
-        fr.write(batch)?
-    }
-    fr.finish()?;
-    Ok(fr.into_inner()?)
-}
--- a/rust/ffi/node/src/convert.rs
+++ b/rust/ffi/node/src/convert.rs
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use neon::prelude::*;
-use neon::types::buffer::TypedArray;
-
-use crate::error::ResultExt;
-
-pub fn vec_str_to_array<'a, C: Context<'a>>(vec: &[String], cx: &mut C) -> JsResult<'a, JsArray> {
-    let a = JsArray::new(cx, vec.len() as u32);
-    for (i, s) in vec.iter().enumerate() {
-        let v = cx.string(s);
-        a.set(cx, i as u32, v)?;
-    }
-    Ok(a)
-}
-
-pub fn js_array_to_vec(array: &JsArray, cx: &mut FunctionContext) -> Vec<f32> {
-    let mut query_vec: Vec<f32> = Vec::new();
-    for i in 0..array.len(cx) {
-        let entry: Handle<JsNumber> = array.get(cx, i).unwrap();
-        query_vec.push(entry.value(cx) as f32);
-    }
-    query_vec
-}
-
-// Creates a new JsBuffer from a rust buffer with a special logic for electron
-pub fn new_js_buffer<'a>(
-    buffer: Vec<u8>,
-    cx: &mut TaskContext<'a>,
-    is_electron: bool,
-) -> NeonResult<Handle<'a, JsBuffer>> {
-    if is_electron {
-        // Electron does not support `external`: https://github.com/neon-bindings/neon/pull/937
-        let mut js_buffer = JsBuffer::new(cx, buffer.len()).or_throw(cx)?;
-        let buffer_data = js_buffer.as_mut_slice(cx);
-        buffer_data.copy_from_slice(buffer.as_slice());
-        Ok(js_buffer)
-    } else {
-        Ok(JsBuffer::external(cx, buffer))
-    }
-}
--- a/rust/ffi/node/src/error.rs
+++ b/rust/ffi/node/src/error.rs
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use arrow_schema::ArrowError;
-use neon::context::Context;
-use neon::prelude::NeonResult;
-use snafu::Snafu;
-
-#[derive(Debug, Snafu)]
-pub enum Error {
-    #[allow(dead_code)]
-    #[snafu(display("column '{name}' is missing"))]
-    MissingColumn { name: String },
-    #[snafu(display("{name}: {message}"))]
-    OutOfRange { name: String, message: String },
-    #[allow(dead_code)]
-    #[snafu(display("{index_type} is not a valid index type"))]
-    InvalidIndexType { index_type: String },
-
-    #[snafu(display("{message}"))]
-    LanceDB { message: String },
-    #[snafu(display("{message}"))]
-    Neon { message: String },
-}
-
-pub type Result<T> = std::result::Result<T, Error>;
-
-impl From<lancedb::error::Error> for Error {
-    fn from(e: lancedb::error::Error) -> Self {
-        Self::LanceDB {
-            message: e.to_string(),
-        }
-    }
-}
-
-impl From<lance::Error> for Error {
-    fn from(e: lance::Error) -> Self {
-        Self::LanceDB {
-            message: e.to_string(),
-        }
-    }
-}
-
-impl From<ArrowError> for Error {
-    fn from(value: ArrowError) -> Self {
-        Self::LanceDB {
-            message: value.to_string(),
-        }
-    }
-}
-
-impl From<neon::result::Throw> for Error {
-    fn from(value: neon::result::Throw) -> Self {
-        Self::Neon {
-            message: value.to_string(),
-        }
-    }
-}
-
-impl<T> From<std::sync::mpsc::SendError<T>> for Error {
-    fn from(value: std::sync::mpsc::SendError<T>) -> Self {
-        Self::Neon {
-            message: value.to_string(),
-        }
-    }
-}
-
-/// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
-/// so it can be returned as a JavaScript error
-/// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
-pub trait ResultExt<T> {
-    fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T>;
-}
-
-/// Implement ResultExt for the std Result so it can be used any Result type
-impl<T, E> ResultExt<T> for std::result::Result<T, E>
-where
-    E: std::fmt::Display,
-{
-    fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T> {
-        match self {
-            Ok(value) => Ok(value),
-            Err(error) => cx.throw_error(error.to_string()),
-        }
-    }
-}
--- a/rust/ffi/node/src/index.rs
+++ b/rust/ffi/node/src/index.rs
@@ -1,5 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-pub mod scalar;
-pub mod vector;
--- a/rust/ffi/node/src/index/scalar.rs
+++ b/rust/ffi/node/src/index/scalar.rs
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use lancedb::index::{scalar::BTreeIndexBuilder, Index};
-use neon::{
-    context::{Context, FunctionContext},
-    result::JsResult,
-    types::{JsBoolean, JsBox, JsPromise, JsString},
-};
-
-use crate::{error::ResultExt, runtime, table::JsTable};
-
-pub fn table_create_scalar_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
-    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
-    let column = cx.argument::<JsString>(0)?.value(&mut cx);
-    let replace = cx.argument::<JsBoolean>(1)?.value(&mut cx);
-
-    let rt = runtime(&mut cx)?;
-
-    let (deferred, promise) = cx.promise();
-    let channel = cx.channel();
-    let table = js_table.table.clone();
-
-    rt.spawn(async move {
-        let idx_result = table
-            .create_index(&[column], Index::BTree(BTreeIndexBuilder::default()))
-            .replace(replace)
-            .execute()
-            .await;
-
-        deferred.settle_with(&channel, move |mut cx| {
-            idx_result.or_throw(&mut cx)?;
-            Ok(cx.undefined())
-        });
-    });
-    Ok(promise)
-}
--- a/rust/ffi/node/src/index/vector.rs
+++ b/rust/ffi/node/src/index/vector.rs
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use lancedb::index::vector::IvfPqIndexBuilder;
-use lancedb::index::Index;
-use lancedb::DistanceType;
-use neon::context::FunctionContext;
-use neon::prelude::*;
-use std::convert::TryFrom;
-
-use crate::error::ResultExt;
-use crate::neon_ext::js_object_ext::JsObjectExt;
-use crate::runtime;
-use crate::table::JsTable;
-
-pub fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
-    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
-    let index_params = cx.argument::<JsObject>(0)?;
-
-    let rt = runtime(&mut cx)?;
-
-    let (deferred, promise) = cx.promise();
-    let channel = cx.channel();
-    let table = js_table.table.clone();
-
-    let column_name = index_params
-        .get_opt::<JsString, _, _>(&mut cx, "column")?
-        .map(|s| s.value(&mut cx))
-        .unwrap_or("vector".to_string()); // Backward compatibility
-
-    let replace = index_params
-        .get_opt::<JsBoolean, _, _>(&mut cx, "replace")?
-        .map(|r| r.value(&mut cx));
-
-    let tbl = table.clone();
-    let ivf_pq_builder = get_index_params_builder(&mut cx, index_params).or_throw(&mut cx)?;
-
-    let mut index_builder = tbl.create_index(&[column_name], Index::IvfPq(ivf_pq_builder));
-    if let Some(replace) = replace {
-        index_builder = index_builder.replace(replace);
-    }
-
-    rt.spawn(async move {
-        let idx_result = index_builder.execute().await;
-        deferred.settle_with(&channel, move |mut cx| {
-            idx_result.or_throw(&mut cx)?;
-            Ok(cx.boxed(JsTable::from(table)))
-        });
-    });
-    Ok(promise)
-}
-
-fn get_index_params_builder(
-    cx: &mut FunctionContext,
-    obj: Handle<JsObject>,
-) -> crate::error::Result<IvfPqIndexBuilder> {
-    if obj.get_opt::<JsString, _, _>(cx, "index_name")?.is_some() {
-        return Err(crate::error::Error::LanceDB {
-            message: "Setting the index_name is no longer supported".to_string(),
-        });
-    }
-    let mut builder = IvfPqIndexBuilder::default();
-    if let Some(metric_type) = obj.get_opt::<JsString, _, _>(cx, "metric_type")? {
-        let distance_type = DistanceType::try_from(metric_type.value(cx).as_str())?;
-        builder = builder.distance_type(distance_type);
-    }
-    if let Some(np) = obj.get_opt_u32(cx, "num_partitions")? {
-        builder = builder.num_partitions(np);
-    }
-    if let Some(ns) = obj.get_opt_u32(cx, "num_sub_vectors")? {
-        builder = builder.num_sub_vectors(ns);
-    }
-    if let Some(max_iters) = obj.get_opt_u32(cx, "max_iters")? {
-        builder = builder.max_iterations(max_iters);
-    }
-    Ok(builder)
-}
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -1,174 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use neon::prelude::*;
-use once_cell::sync::OnceCell;
-use tokio::runtime::Runtime;
-
-use lancedb::connect;
-use lancedb::connection::Connection;
-
-use crate::error::ResultExt;
-use crate::query::JsQuery;
-use crate::table::JsTable;
-
-mod arrow;
-mod convert;
-mod error;
-mod index;
-mod neon_ext;
-mod query;
-mod table;
-
-struct JsDatabase {
-    database: Connection,
-}
-
-impl Finalize for JsDatabase {}
-
-fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
-    static RUNTIME: OnceCell<Runtime> = OnceCell::new();
-    static LOG: OnceCell<()> = OnceCell::new();
-
-    LOG.get_or_init(env_logger::init);
-
-    RUNTIME.get_or_try_init(|| Runtime::new().or_throw(cx))
-}
-
-fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
-    let path = cx.argument::<JsString>(0)?.value(&mut cx);
-    let read_consistency_interval = cx
-        .argument_opt(2)
-        .and_then(|arg| arg.downcast::<JsNumber, _>(&mut cx).ok())
-        .map(|v| v.value(&mut cx))
-        .map(std::time::Duration::from_secs_f64);
-
-    let storage_options_js = cx.argument::<JsArray>(1)?.to_vec(&mut cx)?;
-    let mut storage_options: Vec<(String, String)> = Vec::with_capacity(storage_options_js.len());
-    for handle in storage_options_js {
-        let obj = handle.downcast::<JsArray, _>(&mut cx).unwrap();
-        let key = obj.get::<JsString, _, _>(&mut cx, 0)?.value(&mut cx);
-        let value = obj.get::<JsString, _, _>(&mut cx, 1)?.value(&mut cx);
-
-        storage_options.push((key, value));
-    }
-
-    let rt = runtime(&mut cx)?;
-    let channel = cx.channel();
-    let (deferred, promise) = cx.promise();
-
-    let mut conn_builder = connect(&path).storage_options(storage_options);
-
-    if let Some(interval) = read_consistency_interval {
-        conn_builder = conn_builder.read_consistency_interval(interval);
-    }
-    rt.spawn(async move {
-        let database = conn_builder.execute().await;
-
-        deferred.settle_with(&channel, move |mut cx| {
-            let db = JsDatabase {
-                database: database.or_throw(&mut cx)?,
-            };
-            Ok(cx.boxed(db))
-        });
-    });
-    Ok(promise)
-}
-
-fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
-    let db = cx
-        .this()
-        .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
-
-    let rt = runtime(&mut cx)?;
-    let (deferred, promise) = cx.promise();
-    let channel = cx.channel();
-    let database = db.database.clone();
-
-    rt.spawn(async move {
-        let tables_rst = database.table_names().execute().await;
-
-        deferred.settle_with(&channel, move |mut cx| {
-            let tables = tables_rst.or_throw(&mut cx)?;
-            let table_names = convert::vec_str_to_array(&tables, &mut cx);
-            table_names
-        });
-    });
-    Ok(promise)
-}
-
-fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
-    let db = cx
-        .this()
-        .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
-    let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
-
-    let rt = runtime(&mut cx)?;
-    let channel = cx.channel();
-    let database = db.database.clone();
-
-    let (deferred, promise) = cx.promise();
-    rt.spawn(async move {
-        let table_rst = database.open_table(&table_name).execute().await;
-
-        deferred.settle_with(&channel, move |mut cx| {
-            let js_table = JsTable::from(table_rst.or_throw(&mut cx)?);
-            Ok(cx.boxed(js_table))
-        });
-    });
-    Ok(promise)
-}
-
-fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
-    let db = cx
-        .this()
-        .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
-    let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
-
-    let rt = runtime(&mut cx)?;
-    let channel = cx.channel();
-    let database = db.database.clone();
-
-    let (deferred, promise) = cx.promise();
-    rt.spawn(async move {
-        let result = database.drop_table(&table_name).await;
-        deferred.settle_with(&channel, move |mut cx| {
-            result.or_throw(&mut cx)?;
-            Ok(cx.null())
-        });
-    });
-    Ok(promise)
-}
-
-#[neon::main]
-fn main(mut cx: ModuleContext) -> NeonResult<()> {
-    cx.export_function("databaseNew", database_new)?;
-    cx.export_function("databaseTableNames", database_table_names)?;
-    cx.export_function("databaseOpenTable", database_open_table)?;
-    cx.export_function("databaseDropTable", database_drop_table)?;
-    cx.export_function("tableSearch", JsQuery::js_search)?;
-    cx.export_function("tableCreate", JsTable::js_create)?;
-    cx.export_function("tableAdd", JsTable::js_add)?;
-    cx.export_function("tableCountRows", JsTable::js_count_rows)?;
-    cx.export_function("tableDelete", JsTable::js_delete)?;
-    cx.export_function("tableUpdate", JsTable::js_update)?;
-    cx.export_function("tableMergeInsert", JsTable::js_merge_insert)?;
-    cx.export_function("tableCleanupOldVersions", JsTable::js_cleanup)?;
-    cx.export_function("tableCompactFiles", JsTable::js_compact)?;
-    cx.export_function("tableListIndices", JsTable::js_list_indices)?;
-    cx.export_function("tableIndexStats", JsTable::js_index_stats)?;
-    cx.export_function(
-        "tableCreateScalarIndex",
-        index::scalar::table_create_scalar_index,
-    )?;
-    cx.export_function(
-        "tableCreateVectorIndex",
-        index::vector::table_create_vector_index,
-    )?;
-    cx.export_function("tableSchema", JsTable::js_schema)?;
-    cx.export_function("tableAddColumns", JsTable::js_add_columns)?;
-    cx.export_function("tableAlterColumns", JsTable::js_alter_columns)?;
-    cx.export_function("tableDropColumns", JsTable::js_drop_columns)?;
-    cx.export_function("tableDropIndex", JsTable::js_drop_index)?;
-    Ok(())
-}
--- a/rust/ffi/node/src/neon_ext.rs
+++ b/rust/ffi/node/src/neon_ext.rs
@@ -1,4 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-pub mod js_object_ext;
--- a/rust/ffi/node/src/neon_ext/js_object_ext.rs
+++ b/rust/ffi/node/src/neon_ext/js_object_ext.rs
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use crate::error::{Error, Result};
-use neon::prelude::*;
-
-// extends neon's [JsObject] with helper functions to extract properties
-pub trait JsObjectExt {
-    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
-    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
-    #[allow(dead_code)]
-    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
-}
-
-impl JsObjectExt for JsObject {
-    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>> {
-        let val_opt = self
-            .get_opt::<JsNumber, _, _>(cx, key)?
-            .map(|s| f64_to_u32_safe(s.value(cx), key));
-        val_opt.transpose()
-    }
-
-    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize> {
-        let val = self.get::<JsNumber, _, _>(cx, key)?.value(cx);
-        f64_to_usize_safe(val, key)
-    }
-
-    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>> {
-        let val_opt = self
-            .get_opt::<JsNumber, _, _>(cx, key)?
-            .map(|s| f64_to_usize_safe(s.value(cx), key));
-        val_opt.transpose()
-    }
-}
-
-fn f64_to_u32_safe(n: f64, key: &str) -> Result<u32> {
-    use conv::*;
-
-    n.approx_as::<u32>().map_err(|e| match e {
-        FloatError::NegOverflow(_) => Error::OutOfRange {
-            name: key.into(),
-            message: "must be > 0".to_string(),
-        },
-        FloatError::PosOverflow(_) => Error::OutOfRange {
-            name: key.into(),
-            message: format!("must be < {}", u32::MAX),
-        },
-        FloatError::NotANumber(_) => Error::OutOfRange {
-            name: key.into(),
-            message: "not a valid number".to_string(),
-        },
-    })
-}
-
-fn f64_to_usize_safe(n: f64, key: &str) -> Result<usize> {
-    use conv::*;
-
-    n.approx_as::<usize>().map_err(|e| match e {
-        FloatError::NegOverflow(_) => Error::OutOfRange {
-            name: key.into(),
-            message: "must be > 0".to_string(),
-        },
-        FloatError::PosOverflow(_) => Error::OutOfRange {
-            name: key.into(),
-            message: format!("must be < {}", usize::MAX),
-        },
-        FloatError::NotANumber(_) => Error::OutOfRange {
-            name: key.into(),
-            message: "not a valid number".to_string(),
-        },
-    })
-}
--- a/rust/ffi/node/src/query.rs
+++ b/rust/ffi/node/src/query.rs
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use std::convert::TryFrom;
-use std::ops::Deref;
-
-use futures::{TryFutureExt, TryStreamExt};
-use lancedb::query::{ExecutableQuery, QueryBase, Select};
-use lancedb::DistanceType;
-use neon::context::FunctionContext;
-use neon::handle::Handle;
-use neon::prelude::*;
-
-use crate::arrow::record_batch_to_buffer;
-use crate::error::ResultExt;
-use crate::neon_ext::js_object_ext::JsObjectExt;
-use crate::table::JsTable;
-use crate::{convert, runtime};
-
-pub struct JsQuery {}
-
-impl JsQuery {
-    pub(crate) fn js_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
-        let query_obj = cx.argument::<JsObject>(0)?;
-
-        let limit = query_obj
-            .get_opt::<JsNumber, _, _>(&mut cx, "_limit")?
-            .map(|value| {
-                let limit = value.value(&mut cx);
-                if limit <= 0.0 {
-                    panic!("Limit must be a positive integer");
-                }
-                limit as u64
-            });
-        let select = query_obj
-            .get_opt::<JsArray, _, _>(&mut cx, "_select")?
-            .map(|arr| {
-                let js_array = arr.deref();
-                let mut projection_vec: Vec<String> = Vec::new();
-                for i in 0..js_array.len(&mut cx) {
-                    let entry: Handle<JsString> = js_array.get(&mut cx, i).unwrap();
-                    projection_vec.push(entry.value(&mut cx));
-                }
-                projection_vec
-            });
-
-        let prefilter = query_obj
-            .get::<JsBoolean, _, _>(&mut cx, "_prefilter")?
-            .value(&mut cx);
-
-        let fast_search = query_obj
-            .get_opt::<JsBoolean, _, _>(&mut cx, "_fastSearch")?
-            .map(|val| val.value(&mut cx));
-
-        let is_electron = cx
-            .argument::<JsBoolean>(1)
-            .or_throw(&mut cx)?
-            .value(&mut cx);
-
-        let rt = runtime(&mut cx)?;
-
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        let mut builder = table.query();
-        if let Some(filter) = query_obj
-            .get_opt::<JsString, _, _>(&mut cx, "_filter")?
-            .map(|s| s.value(&mut cx))
-        {
-            builder = builder.only_if(filter);
-        }
-        if let Some(select) = select {
-            builder = builder.select(Select::columns(select.as_slice()));
-        }
-        if let Some(limit) = limit {
-            builder = builder.limit(limit as usize);
-        };
-        if let Some(true) = fast_search {
-            builder = builder.fast_search();
-        }
-
-        let query_vector = query_obj.get_opt::<JsArray, _, _>(&mut cx, "_queryVector")?;
-        if let Some(query) = query_vector.map(|q| convert::js_array_to_vec(q.deref(), &mut cx)) {
-            let mut vector_builder = builder.nearest_to(query).unwrap();
-            if let Some(distance_type) = query_obj
-                .get_opt::<JsString, _, _>(&mut cx, "_metricType")?
-                .map(|s| s.value(&mut cx))
-                .map(|s| DistanceType::try_from(s.as_str()).unwrap())
-            {
-                vector_builder = vector_builder.distance_type(distance_type);
-            }
-
-            let nprobes = query_obj.get_usize(&mut cx, "_nprobes").or_throw(&mut cx)?;
-            vector_builder = vector_builder.nprobes(nprobes);
-
-            if !prefilter {
-                vector_builder = vector_builder.postfilter();
-            }
-            rt.spawn(async move {
-                let results = vector_builder
-                    .execute()
-                    .and_then(|stream| {
-                        stream
-                            .try_collect::<Vec<_>>()
-                            .map_err(lancedb::error::Error::from)
-                    })
-                    .await;
-
-                deferred.settle_with(&channel, move |mut cx| {
-                    let results = results.or_throw(&mut cx)?;
-                    let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
-                    convert::new_js_buffer(buffer, &mut cx, is_electron)
-                });
-            });
-        } else {
-            rt.spawn(async move {
-                let results = builder
-                    .execute()
-                    .and_then(|stream| {
-                        stream
-                            .try_collect::<Vec<_>>()
-                            .map_err(lancedb::error::Error::from)
-                    })
-                    .await;
-
-                deferred.settle_with(&channel, move |mut cx| {
-                    let results = results.or_throw(&mut cx)?;
-                    let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
-                    convert::new_js_buffer(buffer, &mut cx, is_electron)
-                });
-            });
-        };
-
-        Ok(promise)
-    }
-}
--- a/rust/ffi/node/src/table.rs
+++ b/rust/ffi/node/src/table.rs
@@ -1,645 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use std::ops::Deref;
-
-use arrow_array::{RecordBatch, RecordBatchIterator};
-use lance::dataset::optimize::CompactionOptions;
-use lance::dataset::{ColumnAlteration, NewColumnTransform, WriteMode, WriteParams};
-use lancedb::table::{OptimizeAction, WriteOptions};
-
-use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
-use lancedb::table::Table as LanceDbTable;
-use neon::prelude::*;
-use neon::types::buffer::TypedArray;
-
-use crate::error::ResultExt;
-use crate::{convert, runtime, JsDatabase};
-
-pub struct JsTable {
-    pub table: LanceDbTable,
-}
-
-impl Finalize for JsTable {}
-
-impl From<LanceDbTable> for JsTable {
-    fn from(table: LanceDbTable) -> Self {
-        Self { table }
-    }
-}
-
-impl JsTable {
-    pub(crate) fn js_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let db = cx
-            .this()
-            .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
-        let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
-        let buffer = cx.argument::<JsBuffer>(1)?;
-        let (batches, schema) =
-            arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
-
-        // Write mode
-        let mode = match cx.argument::<JsString>(2)?.value(&mut cx).as_str() {
-            "overwrite" => WriteMode::Overwrite,
-            "append" => WriteMode::Append,
-            "create" => WriteMode::Create,
-            _ => {
-                return cx.throw_error("Table::create only supports 'overwrite' and 'create' modes")
-            }
-        };
-        let params = WriteParams {
-            mode,
-            ..WriteParams::default()
-        };
-
-        let rt = runtime(&mut cx)?;
-        let channel = cx.channel();
-
-        let (deferred, promise) = cx.promise();
-        let database = db.database.clone();
-
-        rt.spawn(async move {
-            let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
-            let table_rst = database
-                .create_table(&table_name, batch_reader)
-                .write_options(WriteOptions {
-                    lance_write_params: Some(params),
-                })
-                .execute()
-                .await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                let table = table_rst.or_throw(&mut cx)?;
-                Ok(cx.boxed(Self::from(table)))
-            });
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let buffer = cx.argument::<JsBuffer>(0)?;
-        let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
-        let (batches, schema) =
-            arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        let (deferred, promise) = cx.promise();
-        let write_mode = match write_mode.as_str() {
-            "create" => WriteMode::Create,
-            "append" => WriteMode::Append,
-            "overwrite" => WriteMode::Overwrite,
-            s => return cx.throw_error(format!("invalid write mode {}", s)),
-        };
-
-        let params = WriteParams {
-            mode: write_mode,
-            ..WriteParams::default()
-        };
-
-        rt.spawn(async move {
-            let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
-            let add_result = table
-                .add(batch_reader)
-                .write_options(WriteOptions {
-                    lance_write_params: Some(params),
-                })
-                .execute()
-                .await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                add_result.or_throw(&mut cx)?;
-                Ok(cx.boxed(Self::from(table)))
-            });
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let filter = cx
-            .argument_opt(0)
-            .and_then(|filt| {
-                if filt.is_a::<JsUndefined, _>(&mut cx) || filt.is_a::<JsNull, _>(&mut cx) {
-                    None
-                } else {
-                    Some(
-                        filt.downcast_or_throw::<JsString, _>(&mut cx)
-                            .map(|js_filt| js_filt.deref().value(&mut cx)),
-                    )
-                }
-            })
-            .transpose()?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let num_rows_result = table.count_rows(filter).await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                let num_rows = num_rows_result.or_throw(&mut cx)?;
-                Ok(cx.number(num_rows as f64))
-            });
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let delete_result = table.delete(&predicate).await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                delete_result.or_throw(&mut cx)?;
-                Ok(cx.boxed(Self::from(table)))
-            })
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_merge_insert(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        let key = cx.argument::<JsString>(0)?.value(&mut cx);
-        let mut builder = table.merge_insert(&[&key]);
-        if cx.argument::<JsBoolean>(1)?.value(&mut cx) {
-            let filter = cx.argument_opt(2).unwrap();
-            if filter.is_a::<JsNull, _>(&mut cx) {
-                builder.when_matched_update_all(None);
-            } else {
-                let filter = filter
-                    .downcast_or_throw::<JsString, _>(&mut cx)?
-                    .deref()
-                    .value(&mut cx);
-                builder.when_matched_update_all(Some(filter));
-            }
-        }
-        if cx.argument::<JsBoolean>(3)?.value(&mut cx) {
-            builder.when_not_matched_insert_all();
-        }
-        if cx.argument::<JsBoolean>(4)?.value(&mut cx) {
-            let filter = cx.argument_opt(5).unwrap();
-            if filter.is_a::<JsNull, _>(&mut cx) {
-                builder.when_not_matched_by_source_delete(None);
-            } else {
-                let filter = filter
-                    .downcast_or_throw::<JsString, _>(&mut cx)?
-                    .deref()
-                    .value(&mut cx);
-                builder.when_not_matched_by_source_delete(Some(filter));
-            }
-        }
-
-        let buffer = cx.argument::<JsBuffer>(6)?;
-        let (batches, schema) =
-            arrow_buffer_to_record_batch(buffer.as_slice(&cx)).or_throw(&mut cx)?;
-
-        rt.spawn(async move {
-            let new_data = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
-            let merge_insert_result = builder.execute(Box::new(new_data)).await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                merge_insert_result.or_throw(&mut cx)?;
-                Ok(cx.boxed(Self::from(table)))
-            })
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_update(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let table = js_table.table.clone();
-
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-
-        // create a vector of updates from the passed map
-        let updates_arg = cx.argument::<JsObject>(1)?;
-        let properties = updates_arg.get_own_property_names(&mut cx)?;
-        let mut updates: Vec<(String, String)> =
-            Vec::with_capacity(properties.len(&mut cx) as usize);
-
-        let len_properties = properties.len(&mut cx);
-        for i in 0..len_properties {
-            let property = properties
-                .get_value(&mut cx, i)?
-                .downcast_or_throw::<JsString, _>(&mut cx)?;
-
-            let value = updates_arg
-                .get_value(&mut cx, property)?
-                .downcast_or_throw::<JsString, _>(&mut cx)?;
-
-            let property = property.value(&mut cx);
-            let value = value.value(&mut cx);
-            updates.push((property, value));
-        }
-
-        // get the filter/predicate if the user passed one
-        let predicate = cx.argument_opt(0);
-        let predicate = predicate.unwrap().downcast::<JsString, _>(&mut cx);
-        let predicate = match predicate {
-            Ok(_) => {
-                let val = predicate.map(|s| s.value(&mut cx)).unwrap();
-                Some(val)
-            }
-            Err(_) => {
-                // if the predicate is not string, check it's null otherwise an invalid
-                // type was passed
-                cx.argument::<JsNull>(0)?;
-                None
-            }
-        };
-
-        rt.spawn(async move {
-            let updates_arg = updates
-                .iter()
-                .map(|(k, v)| (k.as_str(), v.as_str()))
-                .collect::<Vec<_>>();
-
-            let predicate = predicate.as_deref();
-
-            let mut update_op = table.update();
-            if let Some(predicate) = predicate {
-                update_op = update_op.only_if(predicate);
-            }
-            for (column, value) in updates_arg {
-                update_op = update_op.column(column, value);
-            }
-            let update_result = update_op.execute().await;
-            deferred.settle_with(&channel, move |mut cx| {
-                update_result.or_throw(&mut cx)?;
-                Ok(cx.boxed(Self::from(table)))
-            })
-        });
-
-        Ok(promise)
-    }
-
-    pub(crate) fn js_cleanup(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let table = js_table.table.clone();
-        let channel = cx.channel();
-
-        let older_than: i64 = cx
-            .argument_opt(0)
-            .and_then(|val| val.downcast::<JsNumber, _>(&mut cx).ok())
-            .map(|val| val.value(&mut cx) as i64)
-            .unwrap_or_else(|| 2 * 7 * 24 * 60); // 2 weeks
-        let older_than = chrono::Duration::try_minutes(older_than).unwrap();
-        let delete_unverified: Option<bool> = Some(
-            cx.argument_opt(1)
-                .and_then(|val| val.downcast::<JsBoolean, _>(&mut cx).ok())
-                .map(|val| val.value(&mut cx))
-                .unwrap_or_default(),
-        );
-        let error_if_tagged_old_versions: Option<bool> = Some(
-            cx.argument_opt(2)
-                .and_then(|val| val.downcast::<JsBoolean, _>(&mut cx).ok())
-                .map(|val| val.value(&mut cx))
-                .unwrap_or_default(),
-        );
-
-        rt.spawn(async move {
-            let stats = table
-                .optimize(OptimizeAction::Prune {
-                    older_than: Some(older_than),
-                    delete_unverified,
-                    error_if_tagged_old_versions,
-                })
-                .await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                let stats = stats.or_throw(&mut cx)?;
-
-                let prune_stats = stats.prune.as_ref().expect("Prune stats missing");
-                let output_metrics = JsObject::new(&mut cx);
-                let bytes_removed = cx.number(prune_stats.bytes_removed as f64);
-                output_metrics.set(&mut cx, "bytesRemoved", bytes_removed)?;
-
-                let old_versions = cx.number(prune_stats.old_versions as f64);
-                output_metrics.set(&mut cx, "oldVersions", old_versions)?;
-
-                let output_table = cx.boxed(Self::from(table));
-
-                let output = JsObject::new(&mut cx);
-                output.set(&mut cx, "metrics", output_metrics)?;
-                output.set(&mut cx, "newTable", output_table)?;
-
-                Ok(output)
-            })
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_compact(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let table = js_table.table.clone();
-        let channel = cx.channel();
-
-        let js_options = cx.argument::<JsObject>(0)?;
-        let mut options = CompactionOptions::default();
-
-        if let Some(target_rows) =
-            js_options.get_opt::<JsNumber, _, _>(&mut cx, "targetRowsPerFragment")?
-        {
-            options.target_rows_per_fragment = target_rows.value(&mut cx) as usize;
-        }
-        if let Some(max_per_group) =
-            js_options.get_opt::<JsNumber, _, _>(&mut cx, "maxRowsPerGroup")?
-        {
-            options.max_rows_per_group = max_per_group.value(&mut cx) as usize;
-        }
-        if let Some(materialize_deletions) =
-            js_options.get_opt::<JsBoolean, _, _>(&mut cx, "materializeDeletions")?
-        {
-            options.materialize_deletions = materialize_deletions.value(&mut cx);
-        }
-        if let Some(materialize_deletions_threshold) =
-            js_options.get_opt::<JsNumber, _, _>(&mut cx, "materializeDeletionsThreshold")?
-        {
-            options.materialize_deletions_threshold =
-                materialize_deletions_threshold.value(&mut cx) as f32;
-        }
-        if let Some(num_threads) = js_options.get_opt::<JsNumber, _, _>(&mut cx, "numThreads")? {
-            options.num_threads = Some(num_threads.value(&mut cx) as usize);
-        }
-
-        rt.spawn(async move {
-            let stats = table
-                .optimize(OptimizeAction::Compact {
-                    options,
-                    remap_options: None,
-                })
-                .await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                let stats = stats.or_throw(&mut cx)?;
-                let stats = stats.compaction.as_ref().expect("Compact stats missing");
-
-                let output_metrics = JsObject::new(&mut cx);
-                let fragments_removed = cx.number(stats.fragments_removed as f64);
-                output_metrics.set(&mut cx, "fragmentsRemoved", fragments_removed)?;
-
-                let fragments_added = cx.number(stats.fragments_added as f64);
-                output_metrics.set(&mut cx, "fragmentsAdded", fragments_added)?;
-
-                let files_removed = cx.number(stats.files_removed as f64);
-                output_metrics.set(&mut cx, "filesRemoved", files_removed)?;
-
-                let files_added = cx.number(stats.files_added as f64);
-                output_metrics.set(&mut cx, "filesAdded", files_added)?;
-
-                let output_table = cx.boxed(Self::from(table));
-
-                let output = JsObject::new(&mut cx);
-                output.set(&mut cx, "metrics", output_metrics)?;
-                output.set(&mut cx, "newTable", output_table)?;
-
-                Ok(output)
-            })
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_list_indices(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        // let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let indices = table.as_native().unwrap().load_indices().await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                let indices = indices.or_throw(&mut cx)?;
-
-                let output = JsArray::new(&mut cx, indices.len() as u32);
-                for (i, index) in indices.iter().enumerate() {
-                    let js_index = JsObject::new(&mut cx);
-                    let index_name = cx.string(index.index_name.clone());
-                    js_index.set(&mut cx, "name", index_name)?;
-
-                    let index_uuid = cx.string(index.index_uuid.clone());
-                    js_index.set(&mut cx, "uuid", index_uuid)?;
-
-                    let js_index_columns = JsArray::new(&mut cx, index.columns.len() as u32);
-                    for (j, column) in index.columns.iter().enumerate() {
-                        let js_column = cx.string(column.clone());
-                        js_index_columns.set(&mut cx, j as u32, js_column)?;
-                    }
-                    js_index.set(&mut cx, "columns", js_index_columns)?;
-
-                    output.set(&mut cx, i as u32, js_index)?;
-                }
-
-                Ok(output)
-            })
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let index_name = cx.argument::<JsString>(0)?.value(&mut cx);
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let load_stats = table.index_stats(index_name).await;
-
-            deferred.settle_with(&channel, move |mut cx| {
-                let stats = load_stats.or_throw(&mut cx)?;
-
-                if let Some(stats) = stats {
-                    let output = JsObject::new(&mut cx);
-                    let num_indexed_rows = cx.number(stats.num_indexed_rows as f64);
-                    output.set(&mut cx, "numIndexedRows", num_indexed_rows)?;
-                    let num_unindexed_rows = cx.number(stats.num_unindexed_rows as f64);
-                    output.set(&mut cx, "numUnindexedRows", num_unindexed_rows)?;
-                    if let Some(distance_type) = stats.distance_type {
-                        let distance_type = cx.string(distance_type.to_string());
-                        output.set(&mut cx, "distanceType", distance_type)?;
-                    }
-                    let index_type = cx.string(stats.index_type.to_string());
-                    output.set(&mut cx, "indexType", index_type)?;
-
-                    if let Some(num_indices) = stats.num_indices {
-                        let num_indices = cx.number(num_indices as f64);
-                        output.set(&mut cx, "numIndices", num_indices)?;
-                    }
-
-                    Ok(output.as_value(&mut cx))
-                } else {
-                    Ok(JsNull::new(&mut cx).as_value(&mut cx))
-                }
-            })
-        });
-
-        Ok(promise)
-    }
-
-    pub(crate) fn js_schema(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        let is_electron = cx
-            .argument::<JsBoolean>(0)
-            .or_throw(&mut cx)?
-            .value(&mut cx);
-
-        rt.spawn(async move {
-            let schema = table.schema().await;
-            deferred.settle_with(&channel, move |mut cx| {
-                let schema = schema.or_throw(&mut cx)?;
-                let batches = vec![RecordBatch::new_empty(schema)];
-                let buffer = record_batch_to_buffer(batches).or_throw(&mut cx)?;
-                convert::new_js_buffer(buffer, &mut cx, is_electron)
-            })
-        });
-        Ok(promise)
-    }
-
-    pub(crate) fn js_add_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let expressions = cx
-            .argument::<JsArray>(0)?
-            .to_vec(&mut cx)?
-            .into_iter()
-            .map(|val| {
-                let obj = val.downcast_or_throw::<JsObject, _>(&mut cx)?;
-                let name = obj.get::<JsString, _, _>(&mut cx, "name")?.value(&mut cx);
-                let sql = obj
-                    .get::<JsString, _, _>(&mut cx, "valueSql")?
-                    .value(&mut cx);
-                Ok((name, sql))
-            })
-            .collect::<NeonResult<Vec<(String, String)>>>()?;
-
-        let transforms = NewColumnTransform::SqlExpressions(expressions);
-
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let result = table.add_columns(transforms, None).await;
-            deferred.settle_with(&channel, move |mut cx| {
-                result.or_throw(&mut cx)?;
-                Ok(cx.undefined())
-            })
-        });
-
-        Ok(promise)
-    }
-
-    pub(crate) fn js_alter_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let alterations = cx
-            .argument::<JsArray>(0)?
-            .to_vec(&mut cx)?
-            .into_iter()
-            .map(|val| {
-                let obj = val.downcast_or_throw::<JsObject, _>(&mut cx)?;
-                let path = obj.get::<JsString, _, _>(&mut cx, "path")?.value(&mut cx);
-                let rename = obj
-                    .get_opt::<JsString, _, _>(&mut cx, "rename")?
-                    .map(|val| val.value(&mut cx));
-                let nullable = obj
-                    .get_opt::<JsBoolean, _, _>(&mut cx, "nullable")?
-                    .map(|val| val.value(&mut cx));
-                // TODO: support data type here. Will need to do some serialization/deserialization
-
-                if rename.is_none() && nullable.is_none() {
-                    return cx.throw_error("At least one of 'name' or 'nullable' must be provided");
-                }
-
-                Ok(ColumnAlteration {
-                    path,
-                    rename,
-                    nullable,
-                    // TODO: wire up this field
-                    data_type: None,
-                })
-            })
-            .collect::<NeonResult<Vec<ColumnAlteration>>>()?;
-
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let result = table.alter_columns(&alterations).await;
-            deferred.settle_with(&channel, move |mut cx| {
-                result.or_throw(&mut cx)?;
-                Ok(cx.undefined())
-            })
-        });
-
-        Ok(promise)
-    }
-
-    pub(crate) fn js_drop_columns(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let columns = cx
-            .argument::<JsArray>(0)?
-            .to_vec(&mut cx)?
-            .into_iter()
-            .map(|val| {
-                Ok(val
-                    .downcast_or_throw::<JsString, _>(&mut cx)?
-                    .value(&mut cx))
-            })
-            .collect::<NeonResult<Vec<String>>>()?;
-
-        let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
-        let rt = runtime(&mut cx)?;
-
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-        let table = js_table.table.clone();
-
-        rt.spawn(async move {
-            let col_refs = columns.iter().map(|s| s.as_str()).collect::<Vec<_>>();
-            let result = table.drop_columns(&col_refs).await;
-            deferred.settle_with(&channel, move |mut cx| {
-                result.or_throw(&mut cx)?;
-                Ok(cx.undefined())
-            })
-        });
-
-        Ok(promise)
-    }
-
-    pub(crate) fn js_drop_index(_cx: FunctionContext) -> JsResult<JsPromise> {
-        todo!("not implemented")
-    }
-}
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.21.2-beta.1"
+version = "0.21.2"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -13,10 +13,10 @@ use lance_datafusion::utils::StreamingWriteSource;
 use lance_encoding::version::LanceFileVersion;
 use lance_table::io::commit::commit_handler_from_url;
 use object_store::local::LocalFileSystem;
-use snafu::{OptionExt, ResultExt};
+use snafu::ResultExt;

 use crate::connection::ConnectRequest;
-use crate::error::{CreateDirSnafu, Error, InvalidTableNameSnafu, Result};
+use crate::error::{CreateDirSnafu, Error, Result};
 use crate::io::object_store::MirroringObjectStoreWrapper;
 use crate::table::NativeTable;
 use crate::utils::validate_table_name;
@@ -411,17 +411,13 @@ impl ListingDatabase {
    fn table_uri(&self, name: &str) -> Result<String> {
        validate_table_name(name)?;

-        let path = Path::new(&self.uri);
-        let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
-
-        let mut uri = table_uri
-            .as_path()
-            .to_str()
-            .context(InvalidTableNameSnafu {
-                name,
-                reason: "Name is not valid URL",
-            })?
-            .to_string();
+        let mut uri = self.uri.clone();
+        // If the URI does not end with a slash, add one
+        if !uri.ends_with('/') {
+            uri.push('/');
+        }
+        // Append the table name with the lance file extension
+        uri.push_str(&format!("{}.{}", name, LANCE_FILE_EXTENSION));

        // If there are query string set on the connection, propagate to lance
        if let Some(query) = self.query_string.as_ref() {
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -958,7 +958,8 @@ impl VectorQuery {
        if let Some(maximum_nprobes) = self.request.maximum_nprobes {
            if minimum_nprobes > maximum_nprobes {
                return Err(Error::InvalidInput {
-                    message: "minimum_nprobes must be less or equal to maximum_nprobes".to_string(),
+                    message: "minimum_nprobes must be less than or equal to maximum_nprobes"
+                        .to_string(),
                });
            }
        }
@@ -989,7 +990,8 @@ impl VectorQuery {
            }
            if maximum_nprobes < self.request.minimum_nprobes {
                return Err(Error::InvalidInput {
-                    message: "maximum_nprobes must be greater than minimum_nprobes".to_string(),
+                    message: "maximum_nprobes must be greater than or equal to minimum_nprobes"
+                        .to_string(),
                });
            }
        }
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -41,6 +41,16 @@ impl Default for ClientConfig {
 /// How to handle timeouts for HTTP requests.
 #[derive(Clone, Default, Debug)]
 pub struct TimeoutConfig {
+    /// The overall timeout for the entire request.
+    ///
+    /// This includes connection, send, and read time. If the entire request
+    /// doesn't complete within this time, it will fail.
+    ///
+    /// You can also set the `LANCE_CLIENT_TIMEOUT` environment variable
+    /// to set this value. Use an integer value in seconds.
+    ///
+    /// By default, no overall timeout is set.
+    pub timeout: Option<Duration>,
    /// The timeout for creating a connection to the server.
    ///
    /// You can also set the `LANCE_CLIENT_CONNECT_TIMEOUT` environment variable
@@ -159,9 +169,9 @@ impl HttpSend for Sender {
 }

 impl RestfulLanceDbClient<Sender> {
-    fn get_timeout(passed: Option<Duration>, env_var: &str, default: Duration) -> Result<Duration> {
+    fn get_timeout(passed: Option<Duration>, env_var: &str) -> Result<Option<Duration>> {
        if let Some(passed) = passed {
-            Ok(passed)
+            Ok(Some(passed))
        } else if let Ok(timeout) = std::env::var(env_var) {
            let timeout = timeout.parse::<u64>().map_err(|_| Error::InvalidInput {
                message: format!(
@@ -169,9 +179,9 @@ impl RestfulLanceDbClient<Sender> {
                    env_var, timeout
                ),
            })?;
-            Ok(Duration::from_secs(timeout))
+            Ok(Some(Duration::from_secs(timeout)))
        } else {
-            Ok(default)
+            Ok(None)
        }
    }

@@ -203,28 +213,34 @@ impl RestfulLanceDbClient<Sender> {
        };

        // Get the timeouts
+        let timeout =
+            Self::get_timeout(client_config.timeout_config.timeout, "LANCE_CLIENT_TIMEOUT")?;
        let connect_timeout = Self::get_timeout(
            client_config.timeout_config.connect_timeout,
            "LANCE_CLIENT_CONNECT_TIMEOUT",
-            Duration::from_secs(120),
-        )?;
+        )?
+        .unwrap_or_else(|| Duration::from_secs(120));
        let read_timeout = Self::get_timeout(
            client_config.timeout_config.read_timeout,
            "LANCE_CLIENT_READ_TIMEOUT",
-            Duration::from_secs(300),
-        )?;
+        )?
+        .unwrap_or_else(|| Duration::from_secs(300));
        let pool_idle_timeout = Self::get_timeout(
            client_config.timeout_config.pool_idle_timeout,
            // Though it's confusing with the connect_timeout name, this is the
            // legacy name for this in the Python sync client. So we keep as-is.
            "LANCE_CLIENT_CONNECTION_TIMEOUT",
-            Duration::from_secs(300),
-        )?;
+        )?
+        .unwrap_or_else(|| Duration::from_secs(300));

-        let client = reqwest::Client::builder()
+        let mut client_builder = reqwest::Client::builder()
            .connect_timeout(connect_timeout)
            .read_timeout(read_timeout)
-            .pool_idle_timeout(pool_idle_timeout)
+            .pool_idle_timeout(pool_idle_timeout);
+        if let Some(timeout) = timeout {
+            client_builder = client_builder.timeout(timeout);
+        }
+        let client = client_builder
            .default_headers(Self::default_headers(
                api_key,
                region,
@@ -581,3 +597,51 @@ pub mod test_utils {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    #[test]
+    fn test_timeout_config_default() {
+        let config = TimeoutConfig::default();
+        assert!(config.timeout.is_none());
+        assert!(config.connect_timeout.is_none());
+        assert!(config.read_timeout.is_none());
+        assert!(config.pool_idle_timeout.is_none());
+    }
+
+    #[test]
+    fn test_timeout_config_with_overall_timeout() {
+        let config = TimeoutConfig {
+            timeout: Some(Duration::from_secs(60)),
+            connect_timeout: Some(Duration::from_secs(10)),
+            read_timeout: Some(Duration::from_secs(30)),
+            pool_idle_timeout: Some(Duration::from_secs(300)),
+        };
+
+        assert_eq!(config.timeout, Some(Duration::from_secs(60)));
+        assert_eq!(config.connect_timeout, Some(Duration::from_secs(10)));
+        assert_eq!(config.read_timeout, Some(Duration::from_secs(30)));
+        assert_eq!(config.pool_idle_timeout, Some(Duration::from_secs(300)));
+    }
+
+    #[test]
+    fn test_client_config_with_timeout() {
+        let timeout_config = TimeoutConfig {
+            timeout: Some(Duration::from_secs(120)),
+            ..Default::default()
+        };
+
+        let client_config = ClientConfig {
+            timeout_config,
+            ..Default::default()
+        };
+
+        assert_eq!(
+            client_config.timeout_config.timeout,
+            Some(Duration::from_secs(120))
+        );
+    }
+}
--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -85,6 +85,14 @@ impl ExecutionPlan for MetadataEraserExec {
        vec![&self.input]
    }

+    fn maintains_input_order(&self) -> Vec<bool> {
+        vec![true; self.children().len()]
+    }
+
+    fn benefits_from_input_partitioning(&self) -> Vec<bool> {
+        vec![false; self.children().len()]
+    }
+
    fn with_new_children(
        self: Arc<Self>,
        children: Vec<Arc<dyn ExecutionPlan>>,
@@ -486,7 +494,6 @@ pub mod tests {
        TestFixture::check_plan(
            plan,
            "MetadataEraserExec
-             RepartitionExec:...
             ProjectionExec:...
             LanceRead:...",
        )
Author	SHA1	Message	Date
David Myriel	3e9f0ac784	Update mkdocs.yml	2025-08-06 17:17:45 -07:00
Will Jones	8ffe992a6f	fix: always uses slashes in table uris (#2575 ) Closes #2574	2025-08-05 12:12:57 -07:00
Will Jones	9d683e4f0b	feat: infer vector columns when name contains 'vector' or 'embedding' (#2547 ) ## Summary - Enhanced vector column detection to use substring matching instead of exact matching - Now detects columns with names containing "vector" or "embedding" (case-insensitive) - Added integer vector support to Node.js implementation (matching Python) - Comprehensive test coverage for both float and integer vector types ## Changes ### Python (`python/python/lancedb/table.py`) - Updated `_infer_target_schema()` to use substring matching with helper function `_is_vector_column()` - Preserved original field names instead of forcing "vector" - Consolidated duplicate logic for better maintainability ### Node.js (`nodejs/lancedb/arrow.ts`) - Enhanced type inference with `nameSuggestsVectorColumn()` helper function - Added `isAllIntegers()` function with performance optimization (checks first 10 elements) - Implemented integer vector support using `Uint8` type (matching Python) - Improved type safety by removing `any` usage ### Tests - Python: Added `test_infer_target_schema_with_vector_embedding_names()` in `test_util.py` - Node.js: Added comprehensive test case in `arrow.test.ts` - Both test suites cover various naming patterns and integer/float vector types ## Examples of newly supported column names: - `user_vector`, `text_embedding`, `doc_embeddings` - `my_vector_field`, `embedding_model` - `VECTOR_COL`, `Vector_Mixed` (case-insensitive) - Both float and integer arrays are properly converted to fixed-size lists ## Test plan - [x] All existing tests pass (backward compatibility maintained) - [x] New tests pass for both Python and Node.js implementations - [x] Integer vector detection works correctly in Node.js - [x] Code passes linting and formatting checks - [x] Performance optimized for large vector arrays Fixes #2546 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-08-04 15:36:49 -07:00
Will Jones	0a1ea1858d	chore: remove vectordb package (#2564 ) ```shell git rm -r rust/ffi git rm -r node git rm ci/build_windows_artifacts.ps1 git rm ci/build_windows_artifacts_nodejs.ps1 git rm ci/build_linux_artifacts.sh git rm ci/build_macos_artifacts.sh git rm -r ci/manylinux_node git rm .github/workflows/node.yml ```	2025-08-04 14:14:33 -07:00
Poornachandra.A.N	7d0127b376	feat(embeddings): add siglip embedding support to lancedb (#2499 ) ### Summary This PR adds SigLIP (Sigmoid Loss Image Pretraining) as a new embedding model in the LanceDB embedding registry. SigLIP improves image-text alignment performance using sigmoid-based contrastive loss and offers robust zero-shot generalization. Fixes #2498 ### What’s Implemented #### 1. `SigLIP` Embedding Class * Added `SigLIP` support under `python/lancedb/embeddings/siglip.py` * Implements: * `compute_source_embeddings` * `_batch_generate_embeddings` * Normalization logic * Batch-wise progress logging for image embedding #### 2. Registry Integration * Registered `SigLIP` in `embeddings/__init__.py` * `SigLIP` now usable via `connect(..., embedding="siglip")` #### 3. Evaluation Benchmark Support * Added SigLIP to `test_embeddings_slow.py` for side-by-side benchmarking with OpenCLIP and ImageBind ### New Test Methods #### `test_siglip` * End-to-end test to verify embeddings table creation and vector shape for SigLIP ![WhatsApp Image 2025-07-10 at 18 00 27_a3368163](https://github.com/user-attachments/assets/e5582ee1-80a3-43d7-a7a1-26ceecce9f4d) #### `test_siglip_vs_openclip_vs_imagebind_benchmark_full` * Benchmarks: * Recall\@1 / 5 / 10 * mAP (Mean Average Precision) * Embedding & Search Latency * Dimensionality reporting ![WhatsApp Image 2025-07-10 at 18 12 13_22c67a84](https://github.com/user-attachments/assets/455bf30f-62b7-4684-a3f3-ad52e2a1ffe5) ### Notes * SigLIP outputs 768D embeddings (vs 512D for OpenCLIP) * Benchmark shows competitive performance despite higher dimensionality * I'm still new to contributing to open-source and learning as I go. Please feel free to suggest any improvements — I'm happy to make changes!	2025-08-04 11:42:39 -07:00
Will Jones	02595dc475	feat: add overall timeout parameter to remote client (#2550 ) ## Summary - Adds an overall `timeout` parameter to `TimeoutConfig` that limits the total time for the entire request - Can be set via config or `LANCE_CLIENT_TIMEOUT` environment variable - Exposed in Python and Node.js bindings - Includes comprehensive tests ## Test plan - [x] Unit tests for Rust TimeoutConfig - [x] Integration tests for Python bindings - [x] Integration tests for Node.js bindings - [x] All existing tests pass 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>	2025-08-04 10:06:55 -07:00
Reed Loden	f23327af79	fix: use SPDX-compliant license name for nodejs packages (#2558 ) Update license field from `Apache 2.0` to be `Apache-2.0` for all Node.js packages. This was causing GitHub's Dependency Review license check to fail with: > The validity of the licenses of the dependencies below could not be determined. Ensure that they are valid SPDX licenses	2025-08-04 09:54:53 -07:00
Wyatt Alt	c7afa724dd	chore: update npm lockfile (#2563 )	2025-07-30 18:28:06 -07:00
BubbleCal	c359cec504	chore: upgrade lance to 0.32.1-beta.2 (#2562 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-07-30 14:31:04 -07:00
Mark McCaskey	fe76496a59	fix: `.nprobes` method in python bindings, improve error messages (#2556 ) `nprobes` with a value greater than 20 fails with the minimum error: ``` self = <lancedb.query.AsyncVectorQuery object at 0x10b749720>, minimum_nprobes = 30 def minimum_nprobes(self, minimum_nprobes: int) -> Self: """Set the minimum number of probes to use. See `nprobes` for more details. These partitions will be searched on every indexed vector query and will increase recall at the expense of latency. """ > self._inner.minimum_nprobes(minimum_nprobes) E ValueError: Invalid input, minimum_nprobes must be less than or equal to maximum_nprobes python/lancedb/query.py:2744: ValueError ``` Putting the max set before the min seems reasonable but it causes this reasonable case to fail: ``` def test_nprobes_min_max_works_sync(table): LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(2).maximum_nprobes(4).to_list() ``` with ``` self = <lancedb.query.AsyncVectorQuery object at 0x1203f1c90>, maximum_nprobes = 4 def maximum_nprobes(self, maximum_nprobes: int) -> Self: """Set the maximum number of probes to use. See `nprobes` for more details. If this value is greater than `minimum_nprobes` then the excess partitions will be searched only if we have not found enough results. This can be useful when there is a narrow filter to allow these queries to spend more time searching and avoid potential false negatives. If this value is 0 then no limit will be applied and all partitions could be searched if needed to satisfy the limit. """ > self._inner.maximum_nprobes(maximum_nprobes) E ValueError: Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes python/lancedb/query.py:2761: ValueError ```. The case I care about is where min == max, but this solution handles it even if they're not. If both min and max exist, we set both to the minimum and then set the max. This isn't 100% the same as the minimum setter checks for 0 on the min and `.nprobes` does not do any sanity checking at all. But I figured this was the most reasonable and general solution without touching more of this code. As part of this I noticed the error messages were a bit ambiguous so I made them symmetric and clarified them while I was here.	2025-07-30 09:23:25 -07:00
Weston Pace	67ec1fe75c	feat: don't repartition for the sake of the metadata eraser (#2559 ) The `MetadataEraserExec` is super lightweight and doesn't really justify partitioning. I had a plan recently that was partitioning just for this node and that seems wasteful.	2025-07-29 19:26:30 -07:00
Lance Release	70d9b04ba5	Bump version: 0.21.2-beta.2 → 0.21.2	2025-07-25 20:32:41 +00:00
Lance Release	b0d4a79c35	Bump version: 0.21.2-beta.1 → 0.21.2-beta.2	2025-07-25 20:31:50 +00:00