Bump version: 0.20.0-beta.1 → 0.20.0-beta.2

2025-12-24 22:09:58 +00:00 · 2025-06-04 07:14:06 +00:00
187 changed files with 15522 additions and 5400 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.2"
+current_version = "0.20.0-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -50,6 +50,11 @@ pre_commit_hooks = [
 optional_value = "final"
 values = ["beta", "final"]

+[[tool.bumpversion.files]]
+filename = "node/package.json"
+replace = "\"version\": \"{new_version}\","
+search = "\"version\": \"{current_version}\","
+
 [[tool.bumpversion.files]]
 filename = "nodejs/package.json"
 replace = "\"version\": \"{new_version}\","
@@ -61,8 +66,39 @@ glob = "nodejs/npm/*/package.json"
 replace = "\"version\": \"{new_version}\","
 search = "\"version\": \"{current_version}\","

+# vectodb node binary packages
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
+search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
+
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
+search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
+
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
+search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
+
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
+search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
+
+[[tool.bumpversion.files]]
+glob = "node/package.json"
+replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
+search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
+
 # Cargo files
 # ------------
+[[tool.bumpversion.files]]
+filename = "rust/ffi/node/Cargo.toml"
+replace = "\nversion = \"{new_version}\""
+search = "\nversion = \"{current_version}\""
+
 [[tool.bumpversion.files]]
 filename = "rust/lancedb/Cargo.toml"
 replace = "\nversion = \"{new_version}\""
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -5,8 +5,8 @@ on:
    tags-ignore:
      # We don't publish pre-releases for Rust. Crates.io is just a source
      # distribution, so we don't need to publish pre-releases.
-      - "v*-beta*"
-      - "*-v*" # for example, python-vX.Y.Z
+      - 'v*-beta*'
+      - '*-v*' # for example, python-vX.Y.Z

 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,8 +19,6 @@ env:
 jobs:
  build:
    runs-on: ubuntu-22.04
-    permissions:
-      id-token: write
    timeout-minutes: 30
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -33,8 +31,6 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
-      - uses: rust-lang/crates-io-auth-action@v1
-        id: auth
      - name: Publish the package
        run: |
-          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
+          cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
--- a/.github/workflows/make-release-commit.yml
+++ b/.github/workflows/make-release-commit.yml
@@ -84,7 +84,7 @@ jobs:
        run: |
          pip install bump-my-version PyGithub packaging
          bash ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} v $COMMIT_BEFORE_BUMP
-          bash ci/update_lockfiles.sh --amend
+          bash ci/update_lockfiles.sh
      - name: Push new version tag
        if: ${{ !inputs.dry_run }}
        uses: ad-m/github-push-action@master
@@ -93,3 +93,11 @@ jobs:
          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
          branch: ${{ github.ref }}
          tags: true
+      - uses: ./.github/workflows/update_package_lock
+        if: ${{ !inputs.dry_run && inputs.other }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+      - uses: ./.github/workflows/update_package_lock_nodejs
+        if: ${{ !inputs.dry_run && inputs.other }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -0,0 +1,147 @@
+name: Node
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    paths:
+      - node/**
+      - rust/ffi/node/**
+      - .github/workflows/node.yml
+      - docker-compose.yml
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  # Disable full debug symbol generation to speed up CI build and keep memory down
+  # "1" means line tables only, which is useful for panic tracebacks.
+  #
+  # Use native CPU to accelerate tests if possible, especially for f16
+  # target-cpu=haswell fixes failing ci build
+  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
+  RUST_BACKTRACE: "1"
+
+jobs:
+  linux:
+    name: Linux (Node ${{ matrix.node-version }})
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        node-version: [ "18", "20" ]
+    runs-on: "ubuntu-22.04"
+    defaults:
+      run:
+        shell: bash
+        working-directory: node
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        lfs: true
+    - uses: actions/setup-node@v3
+      with:
+        node-version: ${{ matrix.node-version }}
+        cache: 'npm'
+        cache-dependency-path: node/package-lock.json
+    - uses: Swatinem/rust-cache@v2
+    - name: Install dependencies
+      run: |
+        sudo apt update
+        sudo apt install -y protobuf-compiler libssl-dev
+    - name: Build
+      run: |
+        npm ci
+        npm run build
+        npm run pack-build
+        npm install --no-save ./dist/lancedb-vectordb-*.tgz
+        # Remove index.node to test with dependency installed
+        rm index.node
+    - name: Test
+      run: npm run test
+  macos:
+    timeout-minutes: 30
+    runs-on: "macos-13"
+    defaults:
+      run:
+        shell: bash
+        working-directory: node
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        lfs: true
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+        cache: 'npm'
+        cache-dependency-path: node/package-lock.json
+    - uses: Swatinem/rust-cache@v2
+    - name: Install dependencies
+      run: brew install protobuf
+    - name: Build
+      run: |
+        npm ci
+        npm run build
+        npm run pack-build
+        npm install --no-save ./dist/lancedb-vectordb-*.tgz
+        # Remove index.node to test with dependency installed
+        rm index.node
+    - name: Test
+      run: |
+        npm run test
+  aws-integtest:
+    timeout-minutes: 45
+    runs-on: "ubuntu-22.04"
+    defaults:
+      run:
+        shell: bash
+        working-directory: node
+    env:
+      AWS_ACCESS_KEY_ID: ACCESSKEY
+      AWS_SECRET_ACCESS_KEY: SECRETKEY
+      AWS_DEFAULT_REGION: us-west-2
+      # this one is for s3
+      AWS_ENDPOINT: http://localhost:4566
+      # this one is for dynamodb
+      DYNAMODB_ENDPOINT: http://localhost:4566
+      ALLOW_HTTP: true
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        lfs: true
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+        cache: 'npm'
+        cache-dependency-path: node/package-lock.json
+    - name: start local stack
+      run: docker compose -f ../docker-compose.yml up -d --wait
+    - name: create s3
+      run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
+    - name: create ddb
+      run: |
+        aws dynamodb create-table \
+          --table-name lancedb-integtest \
+          --attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
+          --key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
+          --provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
+          --endpoint-url $DYNAMODB_ENDPOINT
+    - uses: Swatinem/rust-cache@v2
+    - name: Install dependencies
+      run: |
+        sudo apt update
+        sudo apt install -y protobuf-compiler libssl-dev
+    - name: Build
+      run: |
+        npm ci
+        npm run build
+        npm run pack-build
+        npm install --no-save ./dist/lancedb-vectordb-*.tgz
+        # Remove index.node to test with dependency installed
+        rm index.node
+    - name: Test
+      run: npm run integration-test
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,3 +365,184 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
+
+
+  # ----------------------------------------------------------------------------
+  # vectordb release (legacy)
+  # ----------------------------------------------------------------------------
+  # TODO: delete this when we drop vectordb
+  node:
+    name: vectordb Typescript
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash
+        working-directory: node
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 20
+          cache: "npm"
+          cache-dependency-path: node/package-lock.json
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y protobuf-compiler libssl-dev
+      - name: Build
+        run: |
+          npm ci
+          npm run tsc
+          npm pack
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: node-package
+          path: |
+            node/vectordb-*.tgz
+
+  node-macos:
+    name: vectordb ${{ matrix.config.arch }}
+    strategy:
+      matrix:
+        config:
+          - arch: x86_64-apple-darwin
+            runner: macos-13
+          - arch: aarch64-apple-darwin
+            # xlarge is implicitly arm64.
+            runner: macos-14
+    runs-on: ${{ matrix.config.runner }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install system dependencies
+        run: brew install protobuf
+      - name: Install npm dependencies
+        run: |
+          cd node
+          npm ci
+      - name: Build MacOS native node modules
+        run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
+      - name: Upload Darwin Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: node-native-darwin-${{ matrix.config.arch }}
+          path: |
+            node/dist/lancedb-vectordb-darwin*.tgz
+
+  node-linux-gnu:
+    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
+    runs-on: ${{ matrix.config.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - arch: x86_64
+            runner: ubuntu-latest
+          - arch: aarch64
+            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
+            runner: warp-ubuntu-latest-arm64-4x
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      # To avoid OOM errors on ARM, we create a swap file.
+      - name: Configure aarch64 build
+        if: ${{ matrix.config.arch == 'aarch64' }}
+        run: |
+          free -h
+          sudo fallocate -l 16G /swapfile
+          sudo chmod 600 /swapfile
+          sudo mkswap /swapfile
+          sudo swapon /swapfile
+          echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
+          # print info
+          swapon --show
+          free -h
+      - name: Build Linux Artifacts
+        run: |
+          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: node-native-linux-${{ matrix.config.arch }}-gnu
+          path: |
+            node/dist/lancedb-vectordb-linux*.tgz
+
+  node-windows:
+    name: vectordb ${{ matrix.target }}
+    runs-on: windows-2022
+    strategy:
+      fail-fast: false
+      matrix:
+        target: [x86_64-pc-windows-msvc]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install Protoc v21.12
+        working-directory: C:\
+        run: |
+          New-Item -Path 'C:\protoc' -ItemType Directory
+          Set-Location C:\protoc
+          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+          7z x protoc.zip
+          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+        shell: powershell
+      - name: Install npm dependencies
+        run: |
+          cd node
+          npm ci
+      - name: Build Windows native node modules
+        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
+      - name: Upload Windows Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: node-native-windows
+          path: |
+            node/dist/lancedb-vectordb-win32*.tgz
+
+  release:
+    name: vectordb NPM Publish
+    needs: [node, node-macos, node-linux-gnu, node-windows]
+    runs-on: ubuntu-latest
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: node-*
+      - name: Display structure of downloaded files
+        run: ls -R
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 20
+          registry-url: "https://registry.npmjs.org"
+      - name: Publish to NPM
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
+        run: |
+          # Tag beta as "preview" instead of default "latest". See lancedb
+          # npm publish step for more info.
+          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
+            PUBLISH_ARGS="--tag preview"
+          fi
+
+          mv */*.tgz .
+          for filename in *.tgz; do
+            npm publish $PUBLISH_ARGS $filename
+          done
+      - name: Deprecate
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
+        # We need to deprecate the old package to avoid confusion.
+        # Each time we publish a new version, it gets undeprecated.
+        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
+      - name: Notify Slack Action
+        uses: ravsamhq/notify-slack-action@2.3.0
+        if: ${{ always() }}
+        with:
+          status: ${{ job.status }}
+          notify_when: "failure"
+          notification_title: "{workflow} is failing"
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
--- a/.github/workflows/update_package_lock/action.yml
+++ b/.github/workflows/update_package_lock/action.yml
@@ -0,0 +1,33 @@
+name: update_package_lock
+description: "Update node's package.lock"
+
+inputs:
+  github_token:
+    required: true
+    description: "github token for the repo"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+    - name: Set git configs
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Update package-lock.json file
+      working-directory: ./node
+      run: |
+        npm install
+        git add package-lock.json
+        git commit -m "Updating package-lock.json"
+      shell: bash
+    - name: Push changes
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ inputs.github_token }}
+        branch: main
+        tags: true
--- a/.github/workflows/update_package_lock_nodejs/action.yml
+++ b/.github/workflows/update_package_lock_nodejs/action.yml
@@ -0,0 +1,33 @@
+name: update_package_lock_nodejs
+description: "Update nodejs's package.lock"
+
+inputs:
+  github_token:
+    required: true
+    description: "github token for the repo"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+    - name: Set git configs
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Update package-lock.json file
+      working-directory: ./nodejs
+      run: |
+        npm install
+        git add package-lock.json
+        git commit -m "Updating package-lock.json"
+      shell: bash
+    - name: Push changes
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ inputs.github_token }}
+        branch: main
+        tags: true
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,24 +0,0 @@
-LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
-It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
-remote (against LanceDB Cloud).
-
-The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
-
-Project layout:
-
-* `rust/lancedb`: The LanceDB core Rust implementation.
-* `python`: The Python bindings, using PyO3.
-* `nodejs`: The Typescript bindings, using napi-rs
-* `java`: The Java bindings
-
-(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
-
-Common commands:
-
-* Check for compiler errors: `cargo check --features remote --tests --examples`
-* Run tests: `cargo test --features remote --tests`
-* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
-* Lint: `cargo clippy --features remote --tests --examples`
-* Format: `cargo fmt --all`
-
-Before committing changes, run formatting.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,6 @@
 [workspace]
 members = [
+    "rust/ffi/node",
    "rust/lancedb",
    "nodejs",
    "python",
@@ -20,16 +21,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.32.1", "features" = [
-    "dynamodb",
-], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-index = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-linalg = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-table = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-testing = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-datafusion = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-encoding = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
@@ -40,20 +39,20 @@ arrow-schema = "55.1"
 arrow-arith = "55.1"
 arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "48.0", default-features = false }
-datafusion-catalog = "48.0"
-datafusion-common = { version = "48.0", default-features = false }
-datafusion-execution = "48.0"
-datafusion-expr = "48.0"
-datafusion-physical-plan = "48.0"
+datafusion = { version = "47.0", default-features = false }
+datafusion-catalog = "47.0"
+datafusion-common = { version = "47.0", default-features = false }
+datafusion-execution = "47.0"
+datafusion-expr = "47.0"
+datafusion-physical-plan = "47.0"
 env_logger = "0.11"
-half = { "version" = "2.6.0", default-features = false, features = [
+half = { "version" = "=2.5.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.12.0"
+object_store = "0.11.0"
 pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+set -e
+ARCH=${1:-x86_64}
+TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
+
+# We pass down the current user so that when we later mount the local files
+# into the container, the files are accessible by the current user.
+pushd ci/manylinux_node
+docker build \
+    -t lancedb-node-manylinux \
+    --build-arg="ARCH=$ARCH" \
+    --build-arg="DOCKER_USER=$(id -u)" \
+    --progress=plain \
+    .
+popd
+
+# We turn on memory swap to avoid OOM killer
+docker run \
+    -v $(pwd):/io -w /io \
+    --memory-swap=-1 \
+    lancedb-node-manylinux \
+    bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
--- a/ci/build_macos_artifacts.sh
+++ b/ci/build_macos_artifacts.sh
@@ -0,0 +1,34 @@
+# Builds the macOS artifacts (node binaries).
+# Usage: ./ci/build_macos_artifacts.sh [target]
+# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
+set -e
+
+prebuild_rust() {
+    # Building here for the sake of easier debugging.
+    pushd rust/ffi/node
+    echo "Building rust library for $1"
+    export RUST_BACKTRACE=1
+    cargo build --release --target $1
+    popd
+}
+
+build_node_binaries() {
+    pushd node
+    echo "Building node library for $1"
+    npm run build-release -- --target $1
+    npm run pack-build -- --target $1
+    popd
+}
+
+if [ -n "$1" ]; then
+    targets=$1
+else
+    targets="x86_64-apple-darwin aarch64-apple-darwin"
+fi
+
+echo "Building artifacts for targets: $targets"
+for target in $targets
+    do
+    prebuild_rust $target
+    build_node_binaries $target
+done
--- a/ci/build_windows_artifacts.ps1
+++ b/ci/build_windows_artifacts.ps1
@@ -0,0 +1,42 @@
+# Builds the Windows artifacts (node binaries).
+# Usage:  .\ci\build_windows_artifacts.ps1 [target]
+# Targets supported:
+# - x86_64-pc-windows-msvc
+# - i686-pc-windows-msvc
+# - aarch64-pc-windows-msvc
+
+function Prebuild-Rust {
+    param (
+        [string]$target
+    )
+
+    # Building here for the sake of easier debugging.
+    Push-Location -Path "rust/ffi/node"
+    Write-Host "Building rust library for $target"
+    $env:RUST_BACKTRACE=1
+    cargo build --release --target $target
+    Pop-Location
+}
+
+function Build-NodeBinaries {
+    param (
+        [string]$target
+    )
+
+    Push-Location -Path "node"
+    Write-Host "Building node library for $target"
+    npm run build-release -- --target $target
+    npm run pack-build -- --target $target
+    Pop-Location
+}
+
+$targets = $args[0]
+if (-not $targets) {
+    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
+}
+
+Write-Host "Building artifacts for targets: $targets"
+foreach ($target in $targets) {
+    Prebuild-Rust $target
+    Build-NodeBinaries $target
+}
--- a/ci/build_windows_artifacts_nodejs.ps1
+++ b/ci/build_windows_artifacts_nodejs.ps1
@@ -0,0 +1,42 @@
+# Builds the Windows artifacts (nodejs binaries).
+# Usage:  .\ci\build_windows_artifacts_nodejs.ps1 [target]
+# Targets supported:
+# - x86_64-pc-windows-msvc
+# - i686-pc-windows-msvc
+# - aarch64-pc-windows-msvc
+
+function Prebuild-Rust {
+    param (
+        [string]$target
+    )
+
+    # Building here for the sake of easier debugging.
+    Push-Location -Path "rust/lancedb"
+    Write-Host "Building rust library for $target"
+    $env:RUST_BACKTRACE=1
+    cargo build --release --target $target
+    Pop-Location
+}
+
+function Build-NodeBinaries {
+    param (
+        [string]$target
+    )
+
+    Push-Location -Path "nodejs"
+    Write-Host "Building nodejs library for $target"
+    $env:RUST_TARGET=$target
+    npm run build-release
+    Pop-Location
+}
+
+$targets = $args[0]
+if (-not $targets) {
+    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
+}
+
+Write-Host "Building artifacts for targets: $targets"
+foreach ($target in $targets) {
+    Prebuild-Rust $target
+    Build-NodeBinaries $target
+}
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -0,0 +1,27 @@
+# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
+# This container allows building the node modules native libraries in an
+# environment with a very old glibc, so that we are compatible with a wide
+# range of linux distributions.
+ARG ARCH=x86_64
+
+FROM quay.io/pypa/manylinux_2_28_${ARCH}
+
+ARG ARCH=x86_64
+ARG DOCKER_USER=default_user
+
+# Protobuf is also installed as root.
+COPY install_protobuf.sh install_protobuf.sh
+RUN ./install_protobuf.sh ${ARCH}
+
+ENV DOCKER_USER=${DOCKER_USER}
+# Create a group and user, but only if it doesn't exist
+RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
+
+# We switch to the user to install Rust and Node, since those like to be
+# installed at the user level.
+USER ${DOCKER_USER}
+
+COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
+RUN cp /prepare_manylinux_node.sh $HOME/ && \
+    cd $HOME && \
+    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
+set -e
+ARCH=${1:-x86_64}
+TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
+
+#Alpine doesn't have .bashrc
+FILE=$HOME/.bashrc && test -f $FILE && source $FILE
+
+cd node
+npm ci
+npm run build-release
+npm run pack-build -- -t $TARGET_TRIPLE
--- a/ci/manylinux_node/install_protobuf.sh
+++ b/ci/manylinux_node/install_protobuf.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Installs protobuf compiler. Should be run as root.
+set -e
+
+if [[ $1 == x86_64* ]]; then
+    ARCH=x86_64
+else
+    # gnu target
+    ARCH=aarch_64
+fi
+
+PB_REL=https://github.com/protocolbuffers/protobuf/releases
+PB_VERSION=23.1
+curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
+unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_node/prepare_manylinux_node.sh
+++ b/ci/manylinux_node/prepare_manylinux_node.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+
+install_node() {
+    echo "Installing node..."
+
+    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
+
+    source "$HOME"/.bashrc
+
+    nvm install --no-progress 18
+}
+
+install_rust() {
+    echo "Installing rust..."
+    curl https://sh.rustup.rs -sSf | bash -s -- -y
+    export PATH="$PATH:/root/.cargo/bin"
+}
+
+install_node
+install_rust
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -1,188 +0,0 @@
-import argparse
-import sys
-import json
-
-
-def run_command(command: str) -> str:
-    """
-    Run a shell command and return stdout as a string.
-    If exit code is not 0, raise an exception with the stderr output.
-    """
-    import subprocess
-
-    result = subprocess.run(command, shell=True, capture_output=True, text=True)
-    if result.returncode != 0:
-        raise Exception(f"Command failed with error: {result.stderr.strip()}")
-    return result.stdout.strip()
-
-
-def get_latest_stable_version() -> str:
-    version_line = run_command("cargo info lance | grep '^version:'")
-    version = version_line.split(" ")[1].strip()
-    return version
-
-
-def get_latest_preview_version() -> str:
-    lance_tags = run_command(
-        "git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
-    ).splitlines()
-    lance_tags = (
-        tag.split("refs/tags/")[1]
-        for tag in lance_tags
-        if "refs/tags/" in tag and "beta" in tag
-    )
-    from packaging.version import Version
-
-    latest = max(
-        (tag[1:] for tag in lance_tags if tag.startswith("v")), key=lambda t: Version(t)
-    )
-    return str(latest)
-
-
-def extract_features(line: str) -> list:
-    """
-    Extracts the features from a line in Cargo.toml.
-    Example: 'lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }'
-    Returns: ['dynamodb']
-    """
-    import re
-
-    match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
-    if match:
-        features_str = match.group(1)
-        return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
-    return []
-
-
-def update_cargo_toml(line_updater):
-    """
-    Updates the Cargo.toml file by applying the line_updater function to each line.
-    The line_updater function should take a line as input and return the updated line.
-    """
-    with open("Cargo.toml", "r") as f:
-        lines = f.readlines()
-
-    new_lines = []
-    lance_line = ""
-    is_parsing_lance_line = False
-    for line in lines:
-        if line.startswith("lance"):
-            # Update the line using the provided function
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(line))
-            else:
-                lance_line = line
-                is_parsing_lance_line = True
-        elif is_parsing_lance_line:
-            lance_line += line
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(lance_line))
-                lance_line = ""
-                is_parsing_lance_line = False
-            else:
-                print("doesn't end with }:", line)
-        else:
-            # Keep the line unchanged
-            new_lines.append(line)
-
-    with open("Cargo.toml", "w") as f:
-        f.writelines(new_lines)
-
-
-def set_stable_version(version: str):
-    """
-    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }
-    lance-io = "=0.29.0"
-    ...
-    """
-
-    def line_updater(line: str) -> str:
-        package_name = line.split("=", maxsplit=1)[0].strip()
-        features = extract_features(line)
-        if features:
-            return f'{package_name} = {{ "version" = "={version}", "features" = {json.dumps(features)} }}\n'
-        else:
-            return f'{package_name} = "={version}"\n'
-
-    update_cargo_toml(line_updater)
-
-
-def set_preview_version(version: str):
-    """
-    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
-    lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
-    ...
-    """
-
-    def line_updater(line: str) -> str:
-        package_name = line.split("=", maxsplit=1)[0].strip()
-        features = extract_features(line)
-        base_version = version.split("-")[0]  # Get the base version without beta suffix
-        if features:
-            return f'{package_name} = {{ "version" = "={base_version}", "features" = {json.dumps(features)}, "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
-        else:
-            return f'{package_name} = {{ "version" = "={base_version}", "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
-
-    update_cargo_toml(line_updater)
-
-
-def set_local_version():
-    """
-    Sets lines to
-    lance = { path = "../lance/rust/lance", features = ["dynamodb"] }
-    lance-io = { path = "../lance/rust/lance-io" }
-    ...
-    """
-
-    def line_updater(line: str) -> str:
-        package_name = line.split("=", maxsplit=1)[0].strip()
-        features = extract_features(line)
-        if features:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}", "features" = {json.dumps(features)} }}\n'
-        else:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}" }}\n'
-
-    update_cargo_toml(line_updater)
-
-
-parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
-parser.add_argument(
-    "version",
-    type=str,
-    help="The version to set for the Lance package. Use 'stable' for the latest stable version, 'preview' for latest preview version, or a specific version number (e.g., '0.1.0'). You can also specify 'local' to use a local path.",
-)
-args = parser.parse_args()
-
-if args.version == "stable":
-    latest_stable_version = get_latest_stable_version()
-    print(
-        f"Found latest stable version: \033[1mv{latest_stable_version}\033[0m",
-        file=sys.stderr,
-    )
-    set_stable_version(latest_stable_version)
-elif args.version == "preview":
-    latest_preview_version = get_latest_preview_version()
-    print(
-        f"Found latest preview version: \033[1mv{latest_preview_version}\033[0m",
-        file=sys.stderr,
-    )
-    set_preview_version(latest_preview_version)
-elif args.version == "local":
-    set_local_version()
-else:
-    # Parse the version number.
-    version = args.version
-    # Ignore initial v if present.
-    if version.startswith("v"):
-        version = version[1:]
-
-    if "beta" in version:
-        set_preview_version(version)
-    else:
-        set_stable_version(version)
-
-print("Updating lockfiles...", file=sys.stderr, end="")
-run_command("cargo metadata > /dev/null")
-print(" done.", file=sys.stderr)
--- a/ci/update_lockfiles.sh
+++ b/ci/update_lockfiles.sh
@@ -1,30 +1,18 @@
 #!/usr/bin/env bash
 set -euo pipefail

-AMEND=false
-
-for arg in "$@"; do
-  if [[ "$arg" == "--amend" ]]; then
-    AMEND=true
-  fi
-done
-
 # This updates the lockfile without building
-cargo metadata --quiet > /dev/null
+cargo metadata > /dev/null

 pushd nodejs || exit 1
-npm install --package-lock-only --silent
+npm install --package-lock-only
 popd
 pushd node || exit 1
-npm install --package-lock-only --silent
+npm install --package-lock-only
 popd

 if git diff --quiet --exit-code; then
  echo "No lockfile changes to commit; skipping amend."
-elif $AMEND; then
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
-  git commit --amend --no-edit
 else
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
-  git commit -m "Update lockfiles"
+  git commit --amend --no-edit
 fi
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -103,6 +103,264 @@ markdown_extensions:
      permalink: ""

 nav:
+  - Home:
+      - LanceDB: index.md
+      - 🏃🏼‍♂️ Quick start: basic.md
+      - 📚 Concepts:
+          - Vector search: concepts/vector_search.md
+          - Indexing:
+              - IVFPQ: concepts/index_ivfpq.md
+              - HNSW: concepts/index_hnsw.md
+          - Storage: concepts/storage.md
+          - Data management: concepts/data_management.md
+      - 🔨 Guides:
+          - Working with tables: guides/tables.md
+          - Building a vector index: ann_indexes.md
+          - Vector Search: search.md
+          - Full-text search (native): fts.md
+          - Full-text search (tantivy-based): fts_tantivy.md
+          - Building a scalar index: guides/scalar_index.md
+          - Hybrid search:
+              - Overview: hybrid_search/hybrid_search.md
+              - Comparing Rerankers: hybrid_search/eval.md
+              - Airbnb financial data example: notebooks/hybrid_search.ipynb
+          - Late interaction with MultiVector search:
+              - Overview: guides/multi-vector.md
+              - Example: notebooks/Multivector_on_LanceDB.ipynb
+          - RAG:
+              - Vanilla RAG: rag/vanilla_rag.md
+              - Multi-head RAG: rag/multi_head_rag.md
+              - Corrective RAG: rag/corrective_rag.md
+              - Agentic RAG: rag/agentic_rag.md
+              - Graph RAG: rag/graph_rag.md
+              - Self RAG: rag/self_rag.md
+              - Adaptive RAG: rag/adaptive_rag.md
+              - SFR RAG: rag/sfr_rag.md
+              - Advanced Techniques:
+                  - HyDE: rag/advanced_techniques/hyde.md
+                  - FLARE: rag/advanced_techniques/flare.md
+          - Reranking:
+              - Quickstart: reranking/index.md
+              - Cohere Reranker: reranking/cohere.md
+              - Linear Combination Reranker: reranking/linear_combination.md
+              - Reciprocal Rank Fusion Reranker: reranking/rrf.md
+              - Cross Encoder Reranker: reranking/cross_encoder.md
+              - ColBERT Reranker: reranking/colbert.md
+              - Jina Reranker: reranking/jina.md
+              - OpenAI Reranker: reranking/openai.md
+              - AnswerDotAi Rerankers: reranking/answerdotai.md
+              - Voyage AI Rerankers: reranking/voyageai.md
+              - Building Custom Rerankers: reranking/custom_reranker.md
+              - Example: notebooks/lancedb_reranking.ipynb
+          - Filtering: sql.md
+          - Versioning & Reproducibility:
+              - sync API: notebooks/reproducibility.ipynb
+              - async API: notebooks/reproducibility_async.ipynb
+          - Configuring Storage: guides/storage.md
+          - Migration Guide: migration.md
+          - Tuning retrieval performance:
+              - Choosing right query type: guides/tuning_retrievers/1_query_types.md
+              - Reranking: guides/tuning_retrievers/2_reranking.md
+              - Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
+      - 🧬 Managing embeddings:
+          - Understand Embeddings: embeddings/understanding_embeddings.md
+          - Get Started: embeddings/index.md
+          - Embedding functions: embeddings/embedding_functions.md
+          - Available models:
+              - Overview: embeddings/default_embedding_functions.md
+              - Text Embedding Functions:
+                  - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
+                  - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
+                  - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
+                  - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
+                  - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
+                  - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
+                  - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
+                  - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
+                  - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
+                  - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
+                  - Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
+              - Multimodal Embedding Functions:
+                  - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
+                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
+                  - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
+          - User-defined embedding functions: embeddings/custom_embedding_function.md
+          - Variables and secrets: embeddings/variables_and_secrets.md
+          - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
+          - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
+      - 🔌 Integrations:
+          - Tools and data formats: integrations/index.md
+          - Pandas and PyArrow: python/pandas_and_pyarrow.md
+          - Polars: python/polars_arrow.md
+          - DuckDB: python/duckdb.md
+          - Datafusion: python/datafusion.md
+          - LangChain:
+              - LangChain 🔗: integrations/langchain.md
+              - LangChain demo: notebooks/langchain_demo.ipynb
+              - LangChain JS/TS 🔗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
+          - LlamaIndex 🦙:
+              - LlamaIndex docs: integrations/llamaIndex.md
+              - LlamaIndex demo: notebooks/llamaIndex_demo.ipynb
+          - Pydantic: python/pydantic.md
+          - Voxel51: integrations/voxel51.md
+          - PromptTools: integrations/prompttools.md
+          - dlt: integrations/dlt.md
+          - phidata: integrations/phidata.md
+          - Genkit: integrations/genkit.md
+      - 🎯 Examples:
+          - Overview: examples/index.md
+          - 🐍 Python:
+              - Overview: examples/examples_python.md
+              - Build From Scratch: examples/python_examples/build_from_scratch.md
+              - Multimodal: examples/python_examples/multimodal.md
+              - Rag: examples/python_examples/rag.md
+              - Vector Search: examples/python_examples/vector_search.md
+              - Chatbot: examples/python_examples/chatbot.md
+              - Evaluation: examples/python_examples/evaluations.md
+              - AI Agent: examples/python_examples/aiagent.md
+              - Recommender System: examples/python_examples/recommendersystem.md
+              - Miscellaneous:
+                  - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
+                  - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
+          - 👾 JavaScript:
+              - Overview: examples/examples_js.md
+              - Serverless Website Chatbot: examples/serverless_website_chatbot.md
+              - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
+              - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
+          - 🦀 Rust:
+              - Overview: examples/examples_rust.md
+      - 📓 Studies:
+          - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
+      - 💭 FAQs: faq.md
+      - 🔍 Troubleshooting: troubleshooting.md
+      - ⚙️ API reference:
+          - 🐍 Python: python/python.md
+          - 👾 JavaScript (vectordb): javascript/modules.md
+          - 👾 JavaScript (lancedb): js/globals.md
+          - 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
+
+  - Quick start: basic.md
+  - Concepts:
+      - Vector search: concepts/vector_search.md
+      - Indexing:
+          - IVFPQ: concepts/index_ivfpq.md
+          - HNSW: concepts/index_hnsw.md
+      - Storage: concepts/storage.md
+      - Data management: concepts/data_management.md
+  - Guides:
+      - Working with tables: guides/tables.md
+      - Working with SQL: guides/sql_querying.md
+      - Building an ANN index: ann_indexes.md
+      - Vector Search: search.md
+      - Full-text search (native): fts.md
+      - Full-text search (tantivy-based): fts_tantivy.md
+      - Building a scalar index: guides/scalar_index.md
+      - Hybrid search:
+          - Overview: hybrid_search/hybrid_search.md
+          - Comparing Rerankers: hybrid_search/eval.md
+          - Airbnb financial data example: notebooks/hybrid_search.ipynb
+      - Late interaction with MultiVector search:
+          - Overview: guides/multi-vector.md
+          - Document search Example: notebooks/Multivector_on_LanceDB.ipynb
+      - RAG:
+          - Vanilla RAG: rag/vanilla_rag.md
+          - Multi-head RAG: rag/multi_head_rag.md
+          - Corrective RAG: rag/corrective_rag.md
+          - Agentic RAG: rag/agentic_rag.md
+          - Graph RAG: rag/graph_rag.md
+          - Self RAG: rag/self_rag.md
+          - Adaptive RAG: rag/adaptive_rag.md
+          - SFR RAG: rag/sfr_rag.md
+          - Advanced Techniques:
+              - HyDE: rag/advanced_techniques/hyde.md
+              - FLARE: rag/advanced_techniques/flare.md
+      - Reranking:
+          - Quickstart: reranking/index.md
+          - Cohere Reranker: reranking/cohere.md
+          - Linear Combination Reranker: reranking/linear_combination.md
+          - Reciprocal Rank Fusion Reranker: reranking/rrf.md
+          - Cross Encoder Reranker: reranking/cross_encoder.md
+          - ColBERT Reranker: reranking/colbert.md
+          - Jina Reranker: reranking/jina.md
+          - OpenAI Reranker: reranking/openai.md
+          - AnswerDotAi Rerankers: reranking/answerdotai.md
+          - Building Custom Rerankers: reranking/custom_reranker.md
+          - Example: notebooks/lancedb_reranking.ipynb
+      - Filtering: sql.md
+      - Versioning & Reproducibility:
+          - sync API: notebooks/reproducibility.ipynb
+          - async API: notebooks/reproducibility_async.ipynb
+      - Configuring Storage: guides/storage.md
+      - Migration Guide: migration.md
+      - Tuning retrieval performance:
+          - Choosing right query type: guides/tuning_retrievers/1_query_types.md
+          - Reranking: guides/tuning_retrievers/2_reranking.md
+          - Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
+  - Managing Embeddings:
+      - Understand Embeddings: embeddings/understanding_embeddings.md
+      - Get Started: embeddings/index.md
+      - Embedding functions: embeddings/embedding_functions.md
+      - Available models:
+          - Overview: embeddings/default_embedding_functions.md
+          - Text Embedding Functions:
+              - Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
+              - Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
+              - Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
+              - OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
+              - Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
+              - Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
+              - Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
+              - Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
+              - AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
+              - IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
+          - Multimodal Embedding Functions:
+              - OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
+              - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
+              - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
+      - User-defined embedding functions: embeddings/custom_embedding_function.md
+      - Variables and secrets: embeddings/variables_and_secrets.md
+      - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
+      - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
+  - Integrations:
+      - Overview: integrations/index.md
+      - Pandas and PyArrow: python/pandas_and_pyarrow.md
+      - Polars: python/polars_arrow.md
+      - DuckDB: python/duckdb.md
+      - Datafusion: python/datafusion.md
+      - LangChain 🦜️🔗↗: integrations/langchain.md
+      - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
+      - LlamaIndex 🦙↗: integrations/llamaIndex.md
+      - Pydantic: python/pydantic.md
+      - Voxel51: integrations/voxel51.md
+      - PromptTools: integrations/prompttools.md
+      - dlt: integrations/dlt.md
+      - phidata: integrations/phidata.md
+      - Genkit: integrations/genkit.md
+  - Examples:
+      - examples/index.md
+      - 🐍 Python:
+          - Overview: examples/examples_python.md
+          - Build From Scratch: examples/python_examples/build_from_scratch.md
+          - Multimodal: examples/python_examples/multimodal.md
+          - Rag: examples/python_examples/rag.md
+          - Vector Search: examples/python_examples/vector_search.md
+          - Chatbot: examples/python_examples/chatbot.md
+          - Evaluation: examples/python_examples/evaluations.md
+          - AI Agent: examples/python_examples/aiagent.md
+          - Recommender System: examples/python_examples/recommendersystem.md
+          - Miscellaneous:
+              - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
+              - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
+      - 👾 JavaScript:
+          - Overview: examples/examples_js.md
+          - Serverless Website Chatbot: examples/serverless_website_chatbot.md
+          - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
+          - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
+      - 🦀 Rust:
+          - Overview: examples/examples_rust.md
+  - Studies:
+      - studies/overview.md
+      - ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
  - API reference:
      - Overview: api_reference.md
      - Python: python/python.md
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.21.2-beta.0",
+      "version": "0.12.0",
      "cpu": [
        "x64",
        "arm64"
@@ -65,11 +65,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
-        "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
+        "@lancedb/vectordb-darwin-arm64": "0.12.0",
+        "@lancedb/vectordb-darwin-x64": "0.12.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
--- a/docs/src/guides/sql_querying.md
+++ b/docs/src/guides/sql_querying.md
@@ -1,9 +1,7 @@
-# SQL Querying
-
 You can use DuckDB and Apache Datafusion to query your LanceDB tables using SQL.
 This guide will show how to query Lance tables them using both.

-We will re-use the dataset [created previously](./tables.md):
+We will re-use the dataset [created previously](./pandas_and_pyarrow.md):

 ```python
 import lancedb
@@ -29,17 +27,21 @@ arrow_table = table.to_lance()
 duckdb.query("SELECT * FROM arrow_table")
 ```

-| vector      | item | price |
-| ----------- | ---- | ----- |
-| [3.1, 4.1]  | foo  | 10.0  |
-| [5.9, 26.5] | bar  | 20.0  |
+```
+┌─────────────┬─────────┬────────┐
+│   vector    │  item   │ price  │
+│   float[]   │ varchar │ double │
+├─────────────┼─────────┼────────┤
+│ [3.1, 4.1]  │ foo     │   10.0 │
+│ [5.9, 26.5] │ bar     │   20.0 │
+└─────────────┴─────────┴────────┘
+```

 ## Querying a LanceDB Table with Apache Datafusion

 Have the required imports before doing any querying.

 === "Python"
-
    ```python
    --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb"
    --8<-- "python/python/tests/docs/test_guide_tables.py:import-session-context"
@@ -49,12 +51,16 @@ Have the required imports before doing any querying.
 Register the table created with the Datafusion session context.

 === "Python"
-
    ```python
    --8<-- "python/python/tests/docs/test_guide_tables.py:lance_sql_basic"
    ```

-| vector      | item | price |
-| ----------- | ---- | ----- |
-| [3.1, 4.1]  | foo  | 10.0  |
-| [5.9, 26.5] | bar  | 20.0  |
+```
+┌─────────────┬─────────┬────────┐
+│   vector    │  item   │ price  │
+│   float[]   │ varchar │ double │
+├─────────────┼─────────┼────────┤
+│ [3.1, 4.1]  │ foo     │   10.0 │
+│ [5.9, 26.5] │ bar     │   20.0 │
+└─────────────┴─────────┴────────┘
+```
--- a/docs/src/js/classes/BooleanQuery.md
+++ b/docs/src/js/classes/BooleanQuery.md
@@ -1,53 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / BooleanQuery
-
-# Class: BooleanQuery
-
-Represents a full-text query interface.
-This interface defines the structure and behavior for full-text queries,
-including methods to retrieve the query type and convert the query to a dictionary format.
-
-## Implements
-
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
-
-## Constructors
-
-### new BooleanQuery()
-
-```ts
-new BooleanQuery(queries): BooleanQuery
-```
-
-Creates an instance of BooleanQuery.
-
-#### Parameters
-
-* **queries**: [[`Occur`](../enumerations/Occur.md), [`FullTextQuery`](../interfaces/FullTextQuery.md)][]
-    An array of (Occur, FullTextQuery objects) to combine.
-    Occur specifies whether the query must match, or should match.
-
-#### Returns
-
-[`BooleanQuery`](BooleanQuery.md)
-
-## Methods
-
-### queryType()
-
-```ts
-queryType(): FullTextQueryType
-```
-
-The type of the full-text query.
-
-#### Returns
-
-[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
-
-#### Implementation of
-
-[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
--- a/docs/src/js/classes/MatchQuery.md
+++ b/docs/src/js/classes/MatchQuery.md
@@ -40,8 +40,6 @@ Creates an instance of MatchQuery.
    - `boost`: The boost factor for the query (default is 1.0).
    - `fuzziness`: The fuzziness level for the query (default is 0).
    - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
-    - `operator`: The logical operator to use for combining terms in the query (default is "OR").
-    - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.

 * **options.boost?**: `number`

@@ -49,10 +47,6 @@ Creates an instance of MatchQuery.

 * **options.maxExpansions?**: `number`

-* **options.operator?**: [`Operator`](../enumerations/Operator.md)
-
-* **options.prefixLength?**: `number`
-
 #### Returns

 [`MatchQuery`](MatchQuery.md)
--- a/docs/src/js/classes/MultiMatchQuery.md
+++ b/docs/src/js/classes/MultiMatchQuery.md
@@ -38,12 +38,9 @@ Creates an instance of MultiMatchQuery.
 * **options?**
    Optional parameters for the multi-match query.
    - `boosts`: An array of boost factors for each column (default is 1.0 for all).
-    - `operator`: The logical operator to use for combining terms in the query (default is "OR").

 * **options.boosts?**: `number`[]

-* **options.operator?**: [`Operator`](../enumerations/Operator.md)
-
 #### Returns

 [`MultiMatchQuery`](MultiMatchQuery.md)
--- a/docs/src/js/classes/PhraseQuery.md
+++ b/docs/src/js/classes/PhraseQuery.md
@@ -19,10 +19,7 @@ including methods to retrieve the query type and convert the query to a dictiona
 ### new PhraseQuery()

 ```ts
-new PhraseQuery(
-   query,
-   column,
-   options?): PhraseQuery
+new PhraseQuery(query, column): PhraseQuery
 ```

 Creates an instance of `PhraseQuery`.
@@ -35,12 +32,6 @@ Creates an instance of `PhraseQuery`.
 * **column**: `string`
    The name of the column to search within.

-* **options?**
-    Optional parameters for the phrase query.
-    - `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
-
-* **options.slop?**: `number`
-
 #### Returns

 [`PhraseQuery`](PhraseQuery.md)
--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -1,84 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / Session
-
-# Class: Session
-
-A session for managing caches and object stores across LanceDB operations.
-
-Sessions allow you to configure cache sizes for index and metadata caches,
-which can significantly impact performance for large datasets.
-
-## Constructors
-
-### new Session()
-
-```ts
-new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
-```
-
-Create a new session with custom cache sizes.
-
-# Parameters
-
- `index_cache_size_bytes`: The size of the index cache in bytes.
-  Defaults to 6GB if not specified.
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
-  Defaults to 1GB if not specified.
-
-#### Parameters
-
-* **indexCacheSizeBytes?**: `null` \| `bigint`
-
-* **metadataCacheSizeBytes?**: `null` \| `bigint`
-
-#### Returns
-
-[`Session`](Session.md)
-
-## Methods
-
-### approxNumItems()
-
-```ts
-approxNumItems(): number
-```
-
-Get the approximate number of items cached in the session.
-
-#### Returns
-
-`number`
-
-***
-
-### sizeBytes()
-
-```ts
-sizeBytes(): bigint
-```
-
-Get the current size of the session caches in bytes.
-
-#### Returns
-
-`bigint`
-
-***
-
-### default()
-
-```ts
-static default(): Session
-```
-
-Create a session with default cache sizes.
-
-This is equivalent to creating a session with 6GB index cache
-and 1GB metadata cache.
-
-#### Returns
-
-[`Session`](Session.md)
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -612,7 +612,7 @@ of the given query

 #### Parameters

-* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
    the query, a vector or string

 * **queryType?**: `string`
@@ -799,7 +799,7 @@ by `query`.

 #### Parameters

-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -386,53 +386,6 @@ called then every valid row from the table will be returned.

 ***

-### maximumNprobes()
-
-```ts
-maximumNprobes(maximumNprobes): VectorQuery
-```
-
-Set the maximum number of probes used.
-
-This controls the maximum number of partitions that will be searched.  If this
-number is greater than minimumNprobes then the excess partitions will _only_ be
-searched if we have not found enough results.  This can be useful when there is
-a narrow filter to allow these queries to spend more time searching and avoid
-potential false negatives.
-
-#### Parameters
-
-* **maximumNprobes**: `number`
-
-#### Returns
-
-[`VectorQuery`](VectorQuery.md)
-
-***
-
-### minimumNprobes()
-
-```ts
-minimumNprobes(minimumNprobes): VectorQuery
-```
-
-Set the minimum number of probes used.
-
-This controls the minimum number of partitions that will be searched.  This
-parameter will impact every query against a vector index, regardless of the
-filter.  See `nprobes` for more details.  Higher values will increase recall
-but will also increase latency.
-
-#### Parameters
-
-* **minimumNprobes**: `number`
-
-#### Returns
-
-[`VectorQuery`](VectorQuery.md)
-
-***
-
 ### nprobes()

 ```ts
@@ -460,10 +413,6 @@ For best results we recommend tuning this parameter with a benchmark against
 your actual data to find the smallest possible value that will still give
 you the desired recall.

-For more fine grained control over behavior when you have a very narrow filter
-you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
-the minimum and maximum to the same value.
-
 #### Parameters

 * **nprobes**: `number`
--- a/docs/src/js/enumerations/FullTextQueryType.md
+++ b/docs/src/js/enumerations/FullTextQueryType.md
@@ -15,14 +15,6 @@ Enum representing the types of full-text queries supported.

 ## Enumeration Members

-### Boolean
-
-```ts
-Boolean: "boolean";
-```
-
-***
-
 ### Boost

 ```ts
--- a/docs/src/js/enumerations/Occur.md
+++ b/docs/src/js/enumerations/Occur.md
@@ -1,37 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / Occur
-
-# Enumeration: Occur
-
-Enum representing the occurrence of terms in full-text queries.
-
- `Must`: The term must be present in the document.
- `Should`: The term should contribute to the document score, but is not required.
- `MustNot`: The term must not be present in the document.
-
-## Enumeration Members
-
-### Must
-
-```ts
-Must: "MUST";
-```
-
-***
-
-### MustNot
-
-```ts
-MustNot: "MUST_NOT";
-```
-
-***
-
-### Should
-
-```ts
-Should: "SHOULD";
-```
--- a/docs/src/js/enumerations/Operator.md
+++ b/docs/src/js/enumerations/Operator.md
@@ -1,28 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / Operator
-
-# Enumeration: Operator
-
-Enum representing the logical operators used in full-text queries.
-
- `And`: All terms must match.
- `Or`: At least one term must match.
-
-## Enumeration Members
-
-### And
-
-```ts
-And: "AND";
-```
-
-***
-
-### Or
-
-```ts
-Or: "OR";
-```
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,13 +6,10 @@

 # Function: connect()

-## connect(uri, options, session)
+## connect(uri, options)

 ```ts
-function connect(
-   uri,
-   options?,
-   session?): Promise<Connection>
+function connect(uri, options?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -32,8 +29,6 @@ Accepted formats:
 * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database

-* **session?**: [`Session`](../classes/Session.md)
-
 ### Returns

 `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -82,7 +77,7 @@ Accepted formats:

 [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.

-### Examples
+### Example

 ```ts
 const conn = await connect({
@@ -90,11 +85,3 @@ const conn = await connect({
  storageOptions: {timeout: "60s"}
 });
 ```
-
-```ts
-const session = Session.default();
-const conn = await connect({
-  uri: "/path/to/database",
-  session: session
-});
-```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -12,12 +12,9 @@
 ## Enumerations

 - [FullTextQueryType](enumerations/FullTextQueryType.md)
- [Occur](enumerations/Occur.md)
- [Operator](enumerations/Operator.md)

 ## Classes

- [BooleanQuery](classes/BooleanQuery.md)
 - [BoostQuery](classes/BoostQuery.md)
 - [Connection](classes/Connection.md)
 - [Index](classes/Index.md)
@@ -29,7 +26,6 @@
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
- [Session](classes/Session.md)
 - [Table](classes/Table.md)
 - [TagContents](classes/TagContents.md)
 - [Tags](classes/Tags.md)
@@ -85,7 +81,6 @@
 - [FieldLike](type-aliases/FieldLike.md)
 - [IntoSql](type-aliases/IntoSql.md)
 - [IntoVector](type-aliases/IntoVector.md)
- [MultiVector](type-aliases/MultiVector.md)
 - [RecordBatchLike](type-aliases/RecordBatchLike.md)
 - [SchemaLike](type-aliases/SchemaLike.md)
 - [TableLike](type-aliases/TableLike.md)
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,17 +70,6 @@ Defaults to 'us-east-1'.

 ***

-### session?
-
-```ts
-optional session: Session;
-```
-
-(For LanceDB OSS only): the session to use for this connection. Holds
-shared caches and other session-specific state.
-
-***
-
 ### storageOptions?

 ```ts
--- a/docs/src/js/interfaces/FtsOptions.md
+++ b/docs/src/js/interfaces/FtsOptions.md
@@ -23,7 +23,7 @@ whether to remove punctuation
 ### baseTokenizer?

 ```ts
-optional baseTokenizer: "raw" | "simple" | "whitespace" | "ngram";
+optional baseTokenizer: "raw" | "simple" | "whitespace";
 ```

 The tokenizer to use when building the index.
@@ -71,36 +71,6 @@ tokens longer than this length will be ignored

 ***

-### ngramMaxLength?
-
-```ts
-optional ngramMaxLength: number;
-```
-
-ngram max length
-
-***
-
-### ngramMinLength?
-
-```ts
-optional ngramMinLength: number;
-```
-
-ngram min length
-
-***
-
-### prefixOnly?
-
-```ts
-optional prefixOnly: boolean;
-```
-
-whether to only index the prefix of the token for ngram tokenizer
-
-***
-
 ### removeStopWords?

 ```ts
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### ~~indexCacheSize?~~
+### indexCacheSize?

 ```ts
 optional indexCacheSize: number;
@@ -16,11 +16,6 @@ optional indexCacheSize: number;

 Set the size of the index cache, specified as a number of entries

-#### Deprecated
-
-Use session-level cache configuration instead.
-Create a Session with custom cache sizes and pass it to the connect() function.
-
 The exact meaning of an "entry" will depend on the type of index:
 - IVF: there is one entry for each IVF partition
 - BTREE: there is one entry for the entire index
--- a/docs/src/js/interfaces/OptimizeOptions.md
+++ b/docs/src/js/interfaces/OptimizeOptions.md
@@ -24,10 +24,10 @@ The default is 7 days
 // Delete all versions older than 1 day
 const olderThan = new Date();
 olderThan.setDate(olderThan.getDate() - 1));
-tbl.optimize({cleanupOlderThan: olderThan});
+tbl.cleanupOlderVersions(olderThan);

 // Delete all versions except the current version
-tbl.optimize({cleanupOlderThan: new Date()});
+tbl.cleanupOlderVersions(new Date());
 ```

 ***
--- a/docs/src/js/type-aliases/MultiVector.md
+++ b/docs/src/js/type-aliases/MultiVector.md
@@ -1,11 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / MultiVector
-
-# Type Alias: MultiVector
-
-```ts
-type MultiVector: IntoVector[];
-```
--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
@@ -428,7 +428,7 @@
        "\n",
        "**Why?**  \n",
        "Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time:  \n",
-        "- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search.  \n",
+        "- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search.  \n",
        "- **Step 5a** contains the full ingestion code for reference (run it only if necessary).  \n",
        "- **Step 6** contains the details on creating the index on the multivector column"
      ]
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -30,8 +30,7 @@ excluded_globs = [
    "../src/rag/advanced_techniques/*.md",
    "../src/guides/scalar_index.md",
    "../src/guides/storage.md",
-    "../src/search.md",
-    "../src/guides/sql_querying.md",
+    "../src/search.md"
 ]

 python_prefix = "py"
--- a/docs/test/requirements.txt
+++ b/docs/test/requirements.txt
@@ -7,4 +7,3 @@ tantivy==0.20.1
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch
 polars>=0.19, <=1.3.0
-datafusion
--- a/java/.mvn/wrapper/maven-wrapper.properties
+++ b/java/.mvn/wrapper/maven-wrapper.properties
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-wrapperVersion=3.3.2
-distributionType=only-script
-distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
--- a/java/README.md
+++ b/java/README.md
@@ -1,37 +0,0 @@
-# LanceDB Java SDK
-
-## Configuration and Initialization
-
-### LanceDB Cloud
-
-For LanceDB Cloud, use the simplified builder API:
-
-```java
-import com.lancedb.lance.namespace.LanceRestNamespace;
-
-// If your DB url is db://example-db, then your database here is example-db
-LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
-    .apiKey("your_lancedb_cloud_api_key")
-    .database("your_database_name")
-    .build();
-```
-
-### LanceDB Enterprise
-
-For Enterprise deployments, use your VPC endpoint:
-
-```java
-LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
-    .apiKey("your_lancedb_enterprise_api_key")
-    .database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
-    .hostOverride("http://<vpc_endpoint_dns_name>:80")
-    .build();
-```
-
-## Development
-
-Build:
-
-```shell
-./mvnw install
-```
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
 lance = { workspace = true }
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema.workspace = true
-tokio = "1.46"
+tokio = "1.23"
 jni = "0.21.1"
 snafu.workspace = true
 lazy_static.workspace = true
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,24 +8,18 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-final.0</version>
+        <version>0.20.0-beta.2</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>lancedb-core</artifactId>
-    <name>${project.artifactId}</name>
-    <description>LanceDB Core</description>
+    <name>LanceDB Core</name>
    <packaging>jar</packaging>
    <properties>
        <rust.release.build>false</rust.release.build>
    </properties>

    <dependencies>
-        <dependency>
-            <groupId>com.lancedb</groupId>
-            <artifactId>lance-namespace-core</artifactId>
-            <version>0.0.1</version>
-        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>com.lancedb</groupId>
-        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-final.0</version>
-        <relativePath>../pom.xml</relativePath>
-    </parent>
-
-    <artifactId>lancedb-lance-namespace</artifactId>
-    <name>${project.artifactId}</name>
-    <description>LanceDB Java Integration with Lance Namespace</description>
-    <packaging>jar</packaging>
-
-    <dependencies>
-        <dependency>
-            <groupId>com.lancedb</groupId>
-            <artifactId>lance-namespace-core</artifactId>
-        </dependency>
-    </dependencies>
-</project>
--- a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
+++ b/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
@@ -1,146 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.lancedb.lancedb;
-
-import com.lancedb.lance.namespace.LanceRestNamespace;
-import com.lancedb.lance.namespace.client.apache.ApiClient;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Optional;
-
-/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
-public class LanceDbRestNamespaces {
-  private static final String DEFAULT_REGION = "us-east-1";
-  private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
-
-  private String apiKey;
-  private String database;
-  private Optional<String> hostOverride = Optional.empty();
-  private Optional<String> region = Optional.empty();
-  private Map<String, String> additionalConfig = new HashMap<>();
-
-  private LanceDbRestNamespaces() {}
-
-  /**
-   * Create a new builder instance.
-   *
-   * @return A new LanceRestNamespaceBuilder
-   */
-  public static LanceDbRestNamespaces builder() {
-    return new LanceDbRestNamespaces();
-  }
-
-  /**
-   * Set the API key (required).
-   *
-   * @param apiKey The LanceDB API key
-   * @return This builder
-   */
-  public LanceDbRestNamespaces apiKey(String apiKey) {
-    if (apiKey == null || apiKey.trim().isEmpty()) {
-      throw new IllegalArgumentException("API key cannot be null or empty");
-    }
-    this.apiKey = apiKey;
-    return this;
-  }
-
-  /**
-   * Set the database name (required).
-   *
-   * @param database The database name
-   * @return This builder
-   */
-  public LanceDbRestNamespaces database(String database) {
-    if (database == null || database.trim().isEmpty()) {
-      throw new IllegalArgumentException("Database cannot be null or empty");
-    }
-    this.database = database;
-    return this;
-  }
-
-  /**
-   * Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
-   * construction. Use this for LanceDB Enterprise deployments.
-   *
-   * @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
-   * @return This builder
-   */
-  public LanceDbRestNamespaces hostOverride(String hostOverride) {
-    this.hostOverride = Optional.ofNullable(hostOverride);
-    return this;
-  }
-
-  /**
-   * Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
-   * ignored when hostOverride is set.
-   *
-   * @param region The AWS region (e.g., "us-east-1", "eu-west-1")
-   * @return This builder
-   */
-  public LanceDbRestNamespaces region(String region) {
-    this.region = Optional.ofNullable(region);
-    return this;
-  }
-
-  /**
-   * Add additional configuration parameters.
-   *
-   * @param key The configuration key
-   * @param value The configuration value
-   * @return This builder
-   */
-  public LanceDbRestNamespaces config(String key, String value) {
-    this.additionalConfig.put(key, value);
-    return this;
-  }
-
-  /**
-   * Build the LanceRestNamespace instance.
-   *
-   * @return A configured LanceRestNamespace
-   * @throws IllegalStateException if required parameters are missing
-   */
-  public LanceRestNamespace build() {
-    // Validate required fields
-    if (apiKey == null) {
-      throw new IllegalStateException("API key is required");
-    }
-    if (database == null) {
-      throw new IllegalStateException("Database is required");
-    }
-
-    // Build configuration map
-    Map<String, String> config = new HashMap<>(additionalConfig);
-    config.put("headers.x-lancedb-database", database);
-    config.put("headers.x-api-key", apiKey);
-
-    // Determine base URL
-    String baseUrl;
-    if (hostOverride.isPresent()) {
-      baseUrl = hostOverride.get();
-      config.put("host_override", hostOverride.get());
-    } else {
-      String effectiveRegion = region.orElse(DEFAULT_REGION);
-      baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
-      config.put("region", effectiveRegion);
-    }
-
-    // Create and configure ApiClient
-    ApiClient apiClient = new ApiClient();
-    apiClient.setBasePath(baseUrl);
-
-    return new LanceRestNamespace(apiClient, config);
-  }
-}
--- a/java/mvnw
+++ b/java/mvnw
@@ -1,259 +0,0 @@
-#!/bin/sh
-# ----------------------------------------------------------------------------
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# ----------------------------------------------------------------------------
-
-# ----------------------------------------------------------------------------
-# Apache Maven Wrapper startup batch script, version 3.3.2
-#
-# Optional ENV vars
-# -----------------
-#   JAVA_HOME - location of a JDK home dir, required when download maven via java source
-#   MVNW_REPOURL - repo url base for downloading maven distribution
-#   MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
-#   MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
-# ----------------------------------------------------------------------------
-
-set -euf
-[ "${MVNW_VERBOSE-}" != debug ] || set -x
-
-# OS specific support.
-native_path() { printf %s\\n "$1"; }
-case "$(uname)" in
-CYGWIN* | MINGW*)
-  [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
-  native_path() { cygpath --path --windows "$1"; }
-  ;;
-esac
-
-# set JAVACMD and JAVACCMD
-set_java_home() {
-  # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
-  if [ -n "${JAVA_HOME-}" ]; then
-    if [ -x "$JAVA_HOME/jre/sh/java" ]; then
-      # IBM's JDK on AIX uses strange locations for the executables
-      JAVACMD="$JAVA_HOME/jre/sh/java"
-      JAVACCMD="$JAVA_HOME/jre/sh/javac"
-    else
-      JAVACMD="$JAVA_HOME/bin/java"
-      JAVACCMD="$JAVA_HOME/bin/javac"
-
-      if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
-        echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
-        echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
-        return 1
-      fi
-    fi
-  else
-    JAVACMD="$(
-      'set' +e
-      'unset' -f command 2>/dev/null
-      'command' -v java
-    )" || :
-    JAVACCMD="$(
-      'set' +e
-      'unset' -f command 2>/dev/null
-      'command' -v javac
-    )" || :
-
-    if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
-      echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
-      return 1
-    fi
-  fi
-}
-
-# hash string like Java String::hashCode
-hash_string() {
-  str="${1:-}" h=0
-  while [ -n "$str" ]; do
-    char="${str%"${str#?}"}"
-    h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
-    str="${str#?}"
-  done
-  printf %x\\n $h
-}
-
-verbose() { :; }
-[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
-
-die() {
-  printf %s\\n "$1" >&2
-  exit 1
-}
-
-trim() {
-  # MWRAPPER-139:
-  #   Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
-  #   Needed for removing poorly interpreted newline sequences when running in more
-  #   exotic environments such as mingw bash on Windows.
-  printf "%s" "${1}" | tr -d '[:space:]'
-}
-
-# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
-while IFS="=" read -r key value; do
-  case "${key-}" in
-  distributionUrl) distributionUrl=$(trim "${value-}") ;;
-  distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
-  esac
-done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
-[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
-
-case "${distributionUrl##*/}" in
-maven-mvnd-*bin.*)
-  MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
-  case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
-  *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
-  :Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
-  :Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
-  :Linux*x86_64*) distributionPlatform=linux-amd64 ;;
-  *)
-    echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
-    distributionPlatform=linux-amd64
-    ;;
-  esac
-  distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
-  ;;
-maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
-*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
-esac
-
-# apply MVNW_REPOURL and calculate MAVEN_HOME
-# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
-[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
-distributionUrlName="${distributionUrl##*/}"
-distributionUrlNameMain="${distributionUrlName%.*}"
-distributionUrlNameMain="${distributionUrlNameMain%-bin}"
-MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
-MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
-
-exec_maven() {
-  unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
-  exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
-}
-
-if [ -d "$MAVEN_HOME" ]; then
-  verbose "found existing MAVEN_HOME at $MAVEN_HOME"
-  exec_maven "$@"
-fi
-
-case "${distributionUrl-}" in
-*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
-*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
-esac
-
-# prepare tmp dir
-if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
-  clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
-  trap clean HUP INT TERM EXIT
-else
-  die "cannot create temp dir"
-fi
-
-mkdir -p -- "${MAVEN_HOME%/*}"
-
-# Download and Install Apache Maven
-verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
-verbose "Downloading from: $distributionUrl"
-verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
-
-# select .zip or .tar.gz
-if ! command -v unzip >/dev/null; then
-  distributionUrl="${distributionUrl%.zip}.tar.gz"
-  distributionUrlName="${distributionUrl##*/}"
-fi
-
-# verbose opt
-__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
-[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
-
-# normalize http auth
-case "${MVNW_PASSWORD:+has-password}" in
-'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
-has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
-esac
-
-if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
-  verbose "Found wget ... using wget"
-  wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
-elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
-  verbose "Found curl ... using curl"
-  curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
-elif set_java_home; then
-  verbose "Falling back to use Java to download"
-  javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
-  targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
-  cat >"$javaSource" <<-END
-	public class Downloader extends java.net.Authenticator
-	{
-	  protected java.net.PasswordAuthentication getPasswordAuthentication()
-	  {
-	    return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
-	  }
-	  public static void main( String[] args ) throws Exception
-	  {
-	    setDefault( new Downloader() );
-	    java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
-	  }
-	}
-	END
-  # For Cygwin/MinGW, switch paths to Windows format before running javac and java
-  verbose " - Compiling Downloader.java ..."
-  "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
-  verbose " - Running Downloader.java ..."
-  "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
-fi
-
-# If specified, validate the SHA-256 sum of the Maven distribution zip file
-if [ -n "${distributionSha256Sum-}" ]; then
-  distributionSha256Result=false
-  if [ "$MVN_CMD" = mvnd.sh ]; then
-    echo "Checksum validation is not supported for maven-mvnd." >&2
-    echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
-    exit 1
-  elif command -v sha256sum >/dev/null; then
-    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
-      distributionSha256Result=true
-    fi
-  elif command -v shasum >/dev/null; then
-    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
-      distributionSha256Result=true
-    fi
-  else
-    echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
-    echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
-    exit 1
-  fi
-  if [ $distributionSha256Result = false ]; then
-    echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
-    echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
-    exit 1
-  fi
-fi
-
-# unzip and move
-if command -v unzip >/dev/null; then
-  unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
-else
-  tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
-fi
-printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
-mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
-
-clean || :
-exec_maven "$@"
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,10 +6,11 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.2-final.0</version>
+    <version>0.20.0-beta.2</version>
    <packaging>pom</packaging>
-    <name>${project.artifactId}</name>
-    <description>LanceDB Java SDK Parent POM</description>
+
+    <name>LanceDB Parent</name>
+    <description>LanceDB vector database Java API</description>
    <url>http://lancedb.com/</url>

    <developers>
@@ -28,7 +29,6 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-namespace.verison>0.0.1</lance-namespace.verison>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -52,7 +52,6 @@

    <modules>
        <module>core</module>
-        <module>lance-namespace</module>
    </modules>

    <scm>
@@ -63,11 +62,6 @@

    <dependencyManagement>
        <dependencies>
-            <dependency>
-                <groupId>com.lancedb</groupId>
-                <artifactId>lance-namespace-core</artifactId>
-                <version>${lance-namespace.verison}</version>
-            </dependency>
            <dependency>
                <groupId>org.apache.arrow</groupId>
                <artifactId>arrow-vector</artifactId>
--- a/node/.eslintrc.js
+++ b/node/.eslintrc.js
@@ -0,0 +1,22 @@
+module.exports = {
+  env: {
+    browser: true,
+    es2021: true
+  },
+  extends: 'standard-with-typescript',
+  overrides: [
+  ],
+  parserOptions: {
+    project: './tsconfig.json',
+    ecmaVersion: 'latest',
+    sourceType: 'module'
+  },
+  rules: {
+    "@typescript-eslint/method-signature-style": "off",
+    "@typescript-eslint/quotes": "off",
+    "@typescript-eslint/semi": "off",
+    "@typescript-eslint/explicit-function-return-type": "off",
+    "@typescript-eslint/space-before-function-paren": "off",
+    "@typescript-eslint/indent": "off",
+  }
+}
--- a/node/.npmignore
+++ b/node/.npmignore
@@ -0,0 +1,4 @@
+gen_test_data.py
+index.node
+dist/lancedb*.tgz
+vectordb*.tgz
--- a/node/CHANGELOG.md
+++ b/node/CHANGELOG.md
@@ -0,0 +1,64 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.1.5] - 2023-06-00
+
+### Added
+
+- Support for macOS X86
+
+## [0.1.4] - 2023-06-03
+
+### Added
+
+- Select / Project query API
+
+### Changed
+
+-  Deprecated created_index in favor of createIndex
+
+## [0.1.3] - 2023-06-01
+
+### Added
+
+- Support S3 and Google Cloud Storage
+- Embedding functions support
+- OpenAI embedding function
+
+## [0.1.2] - 2023-05-27
+
+### Added
+
+- Append records API
+- Extra query params to to nodejs client
+- Create_index API
+ 
+### Fixed
+
+- bugfix: string columns should be converted to Utf8Array (#94)
+
+## [0.1.1] - 2023-05-16
+
+### Added
+
+- create_table API
+- limit parameter for queries
+- Typescript / JavaScript examples
+- Linux support
+
+## [0.1.0] - 2023-05-16
+
+### Added
+
+- Initial  JavaScript / Node.js library for LanceDB
+- Read-only api to query LanceDB datasets
+- Supports macOS arm only
+
+## [pre-0.1.0]
+
+- Various prototypes / test builds
+
--- a/node/README.md
+++ b/node/README.md
@@ -0,0 +1,66 @@
+# LanceDB
+
+A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
+
+**DEPRECATED: This library is deprecated. Please use the new client,
+[@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
+
+## Installation
+
+```bash
+npm install vectordb
+```
+
+This will download the appropriate native library for your platform. We currently
+support:
+
+* Linux (x86_64 and aarch64)
+* MacOS (Intel and ARM/M1/M2)
+* Windows (x86_64 only)
+
+We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
+
+## Usage
+
+### Basic Example
+
+```javascript
+const lancedb = require('vectordb');
+const db = await lancedb.connect('data/sample-lancedb');
+const table = await db.createTable("my_table",
+      [{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
+      { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
+const results = await table.search([0.1, 0.3]).limit(20).execute();
+console.log(results);
+```
+
+The [examples](./examples) folder contains complete examples.
+
+## Development
+
+To build everything fresh:
+
+```bash
+npm install
+npm run build
+```
+
+Then you should be able to run the tests with:
+
+```bash
+npm test
+```
+
+### Fix lints
+
+To run the linter and have it automatically fix all errors
+
+```bash
+npm run lint -- --fix
+```
+
+To build documentation
+
+```bash
+npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
+```
--- a/node/examples/js-openai/index.js
+++ b/node/examples/js-openai/index.js
@@ -0,0 +1,41 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict'
+
+async function example () {
+  const lancedb = require('vectordb')
+  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
+  const apiKey = process.env.OPENAI_API_KEY
+  // The embedding function will create embeddings for the 'text' column(text in this case)
+  const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
+
+  const db = await lancedb.connect('data/sample-lancedb')
+
+  const data = [
+    { id: 1, text: 'Black T-Shirt', price: 10 },
+    { id: 2, text: 'Leather Jacket', price: 50 }
+  ]
+
+  const table = await db.createTable('vectors', data, embedding)
+  console.log(await db.tableNames())
+
+  const results = await table
+    .search('keeps me warm')
+    .limit(1)
+    .execute()
+  console.log(results[0].text)
+}
+
+example().then(_ => { console.log('All done!') })
--- a/node/examples/js-openai/package.json
+++ b/node/examples/js-openai/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "vectordb-example-js-openai",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "Lance Devs",
+  "license": "Apache-2.0",
+  "dependencies": {
+    "vectordb": "file:../..",
+    "openai": "^3.2.1"
+  }
+}
--- a/node/examples/js-transformers/index.js
+++ b/node/examples/js-transformers/index.js
@@ -0,0 +1,66 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict'
+
+
+async function example() {
+
+    const lancedb = require('vectordb')
+
+    // Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
+    const { pipeline } = await import('@xenova/transformers')
+    const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
+
+
+    // Create embedding function from pipeline which returns a list of vectors from batch
+    // sourceColumn is the name of the column in the data to be embedded
+    //
+    // Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
+    const embed_fun = {}
+    embed_fun.sourceColumn = 'text'
+    embed_fun.embed = async function (batch) {
+        let result = []
+        for (let text of batch) {
+            const res = await pipe(text, { pooling: 'mean', normalize: true })
+            result.push(Array.from(res['data']))
+        }
+        return (result)
+    }
+
+    // Link a folder and create a table with data
+    const db = await lancedb.connect('data/sample-lancedb')
+
+    const data = [
+        { id: 1, text: 'Cherry', type: 'fruit' },
+        { id: 2, text: 'Carrot', type: 'vegetable' },
+        { id: 3, text: 'Potato', type: 'vegetable' },
+        { id: 4, text: 'Apple', type: 'fruit' },
+        { id: 5, text: 'Banana', type: 'fruit' }
+    ]
+
+    const table = await db.createTable('food_table', data, embed_fun)
+
+
+    // Query the table
+    const results = await table
+        .search("a sweet fruit to eat")
+        .metricType("cosine")
+        .limit(2)
+        .execute()
+    console.log(results.map(r => r.text))
+
+}
+
+example().then(_ => { console.log("Done!") })
--- a/node/examples/js-transformers/package.json
+++ b/node/examples/js-transformers/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "vectordb-example-js-transformers",
+  "version": "1.0.0",
+  "description": "Example for using transformers.js with lancedb",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "Lance Devs",
+  "license": "Apache-2.0",
+  "dependencies": {
+    "@xenova/transformers": "^2.4.1",
+    "vectordb": "file:../.."
+  }
+
+}
--- a/node/examples/js-youtube-transcripts/index.js
+++ b/node/examples/js-youtube-transcripts/index.js
@@ -0,0 +1,122 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict'
+
+const lancedb = require('vectordb')
+const fs = require('fs/promises')
+const readline = require('readline/promises')
+const { stdin: input, stdout: output } = require('process')
+const { Configuration, OpenAIApi } = require('openai')
+
+// Download file from XYZ
+const INPUT_FILE_NAME = 'data/youtube-transcriptions_sample.jsonl';
+
+(async () => {
+  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
+  const apiKey = process.env.OPENAI_API_KEY
+  // The embedding function will create embeddings for the 'context' column
+  const embedFunction = new lancedb.OpenAIEmbeddingFunction('context', apiKey)
+
+  // Connects to LanceDB
+  const db = await lancedb.connect('data/youtube-lancedb')
+
+  // Open the vectors table or create one if it does not exist
+  let tbl
+  if ((await db.tableNames()).includes('vectors')) {
+    tbl = await db.openTable('vectors', embedFunction)
+  } else {
+    tbl = await createEmbeddingsTable(db, embedFunction)
+  }
+
+  // Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
+  const configuration = new Configuration({ apiKey })
+  const openai = new OpenAIApi(configuration)
+  const rl = readline.createInterface({ input, output })
+  try {
+    while (true) {
+      const query = await rl.question('Prompt: ')
+      const results = await tbl
+        .search(query)
+        .select(['title', 'text', 'context'])
+        .limit(3)
+        .execute()
+
+      // console.table(results)
+
+      const response = await openai.createCompletion({
+        model: 'text-davinci-003',
+        prompt: createPrompt(query, results),
+        max_tokens: 400,
+        temperature: 0,
+        top_p: 1,
+        frequency_penalty: 0,
+        presence_penalty: 0
+      })
+      console.log(response.data.choices[0].text)
+    }
+  } catch (err) {
+    console.log('Error: ', err)
+  } finally {
+    rl.close()
+  }
+  process.exit(1)
+})()
+
+async function createEmbeddingsTable (db, embedFunction) {
+  console.log(`Creating embeddings from ${INPUT_FILE_NAME}`)
+  // read the input file into a JSON array, skipping empty lines
+  const lines = (await fs.readFile(INPUT_FILE_NAME, 'utf-8'))
+    .toString()
+    .split('\n')
+    .filter(line => line.length > 0)
+    .map(line => JSON.parse(line))
+
+  const data = contextualize(lines, 20, 'video_id')
+  return await db.createTable('vectors', data, embedFunction)
+}
+
+// Each transcript has a small text column, we include previous transcripts in order to
+// have more context information when creating embeddings
+function contextualize (rows, contextSize, groupColumn) {
+  const grouped = []
+  rows.forEach(row => {
+    if (!grouped[row[groupColumn]]) {
+      grouped[row[groupColumn]] = []
+    }
+    grouped[row[groupColumn]].push(row)
+  })
+
+  const data = []
+  Object.keys(grouped).forEach(key => {
+    for (let i = 0; i < grouped[key].length; i++) {
+      const start = i - contextSize > 0 ? i - contextSize : 0
+      grouped[key][i].context = grouped[key].slice(start, i + 1).map(r => r.text).join(' ')
+    }
+    data.push(...grouped[key])
+  })
+  return data
+}
+
+// Creates a prompt by aggregating all relevant contexts
+function createPrompt (query, context) {
+  let prompt =
+      'Answer the question based on the context below.\n\n' +
+      'Context:\n'
+
+  // need to make sure our prompt is not larger than max size
+  prompt = prompt + context.map(c => c.context).join('\n\n---\n\n').substring(0, 3750)
+  prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`
+  return prompt
+}
--- a/node/examples/js-youtube-transcripts/package.json
+++ b/node/examples/js-youtube-transcripts/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "vectordb-example-js-openai",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "Lance Devs",
+  "license": "Apache-2.0",
+  "dependencies": {
+    "vectordb": "file:../..",
+    "openai": "^3.2.1"
+  }
+}
--- a/node/examples/js/index.js
+++ b/node/examples/js/index.js
@@ -0,0 +1,36 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+'use strict'
+
+async function example () {
+  const lancedb = require('vectordb')
+  const db = await lancedb.connect('data/sample-lancedb')
+
+  const data = [
+    { id: 1, vector: [0.1, 0.2], price: 10 },
+    { id: 2, vector: [1.1, 1.2], price: 50 }
+  ]
+
+  const table = await db.createTable('vectors', data)
+  console.log(await db.tableNames())
+
+  const results = await table
+      .search([0.1, 0.3])
+      .limit(20)
+      .execute()
+  console.log(results)
+}
+
+example()
--- a/node/examples/js/package.json
+++ b/node/examples/js/package.json
@@ -0,0 +1,14 @@
+{
+  "name": "vectordb-example-js",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "author": "Lance Devs",
+  "license": "Apache-2.0",
+  "dependencies": {
+    "vectordb": "file:../.."
+  }
+}
--- a/node/examples/ts/package.json
+++ b/node/examples/ts/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "vectordb-example-ts",
+  "version": "1.0.0",
+  "description": "",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "scripts": {
+    "tsc": "tsc -b",
+    "build": "tsc"
+  },
+  "author": "Lance Devs",
+  "license": "Apache-2.0",
+  "devDependencies": {
+    "@types/node": "^18.16.2",
+    "ts-node": "^10.9.1",
+    "ts-node-dev": "^2.0.0",
+    "typescript": "*"
+  },
+  "dependencies": {
+    "vectordb": "file:../.."
+  }
+}
--- a/node/examples/ts/src/index.ts
+++ b/node/examples/ts/src/index.ts
@@ -0,0 +1,35 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import * as vectordb from 'vectordb';
+
+async function example () {
+    const db = await vectordb.connect('data/sample-lancedb')
+
+    const data = [
+        { id: 1, vector: [0.1, 0.2], price: 10 },
+        { id: 2, vector: [1.1, 1.2], price: 50 }
+    ]
+
+    const table = await db.createTable('vectors', data)
+    console.log(await db.tableNames())
+
+    const results = await table
+        .search([0.1, 0.3])
+        .limit(20)
+        .execute()
+    console.log(results)
+}
+
+example().then(_ => { console.log ("All done!") })
--- a/node/examples/ts/tsconfig.json
+++ b/node/examples/ts/tsconfig.json
@@ -0,0 +1,10 @@
+{
+  "include": ["src/**/*.ts"],
+  "compilerOptions": {
+    "target": "es2016",
+    "module": "commonjs",
+    "declaration": true,
+    "outDir": "./dist",
+    "strict": true
+  }
+}
--- a/node/native.js
+++ b/node/native.js
@@ -0,0 +1,36 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+const { currentTarget } = require('@neon-rs/load')
+
+let nativeLib
+
+try {
+  // When developing locally, give preference to the local built library
+  nativeLib = require('./index.node')
+} catch {
+  try {
+    nativeLib = require(`@lancedb/vectordb-${currentTarget()}`)
+  } catch (e) {
+    throw new Error(`vectordb: failed to load native library.
+  You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
+
+  If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
+      
+  Source error: ${e}`)
+  }
+}
+
+// Dynamic require for runtime.
+module.exports = nativeLib
--- a/node/package-lock.json
+++ b/node/package-lock.json
--- a/node/package.json
+++ b/node/package.json
@@ -0,0 +1,98 @@
+{
+  "name": "vectordb",
+  "version": "0.20.0-beta.2",
+  "description": " Serverless, low-latency vector database for AI applications",
+  "private": false,
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "scripts": {
+    "tsc": "tsc -b",
+    "build": "npm run tsc && cargo-cp-artifact --artifact cdylib lancedb_node index.node -- cargo build -p lancedb-node --message-format=json",
+    "build-release": "npm run build -- --release",
+    "test": "npm run tsc && mocha -recursive dist/test",
+    "integration-test": "npm run tsc && mocha -recursive dist/integration_test",
+    "lint": "eslint native.js src --ext .js,.ts",
+    "clean": "rm -rf node_modules *.node dist/",
+    "pack-build": "neon pack-build",
+    "check-npm": "printenv && which node && which npm && npm --version"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/lancedb/lancedb.git"
+  },
+  "homepage": "https://lancedb.github.io/lancedb/",
+  "bugs": {
+    "url": "https://github.com/lancedb/lancedb/issues"
+  },
+  "keywords": [
+    "data-format",
+    "data-science",
+    "machine-learning",
+    "data-analytics"
+  ],
+  "author": "Lance Devs",
+  "license": "Apache-2.0",
+  "devDependencies": {
+    "@neon-rs/cli": "^0.0.160",
+    "@types/chai": "^4.3.4",
+    "@types/chai-as-promised": "^7.1.5",
+    "@types/mocha": "^10.0.1",
+    "@types/node": "^18.16.2",
+    "@types/sinon": "^10.0.15",
+    "@types/temp": "^0.9.1",
+    "@types/uuid": "^9.0.3",
+    "@typescript-eslint/eslint-plugin": "^5.59.1",
+    "apache-arrow-old": "npm:apache-arrow@13.0.0",
+    "cargo-cp-artifact": "^0.1",
+    "chai": "^4.3.7",
+    "chai-as-promised": "^7.1.1",
+    "eslint": "^8.39.0",
+    "eslint-config-standard-with-typescript": "^34.0.1",
+    "eslint-plugin-import": "^2.26.0",
+    "eslint-plugin-n": "^15.7.0",
+    "eslint-plugin-promise": "^6.1.1",
+    "mocha": "^10.2.0",
+    "openai": "^4.24.1",
+    "sinon": "^15.1.0",
+    "temp": "^0.9.4",
+    "ts-node": "^10.9.1",
+    "ts-node-dev": "^2.0.0",
+    "typedoc": "^0.24.7",
+    "typedoc-plugin-markdown": "^3.15.3",
+    "typescript": "^5.1.0",
+    "uuid": "^9.0.0"
+  },
+  "dependencies": {
+    "@neon-rs/load": "^0.0.74",
+    "axios": "^1.4.0"
+  },
+  "peerDependencies": {
+    "@apache-arrow/ts": "^14.0.2",
+    "apache-arrow": "^14.0.2"
+  },
+  "os": [
+    "darwin",
+    "linux",
+    "win32"
+  ],
+  "cpu": [
+    "x64",
+    "arm64"
+  ],
+  "neon": {
+    "targets": {
+      "x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
+      "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
+      "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
+      "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
+      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
+    }
+  },
+  "optionalDependencies": {
+    "@lancedb/vectordb-darwin-x64": "0.20.0-beta.2",
+    "@lancedb/vectordb-darwin-arm64": "0.20.0-beta.2",
+    "@lancedb/vectordb-linux-x64-gnu": "0.20.0-beta.2",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.20.0-beta.2",
+    "@lancedb/vectordb-win32-x64-msvc": "0.20.0-beta.2"
+  }
+}
--- a/node/src/arrow.ts
+++ b/node/src/arrow.ts
@@ -0,0 +1,635 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import {
+  Field,
+  makeBuilder,
+  RecordBatchFileWriter,
+  Utf8,
+  type Vector,
+  FixedSizeList,
+  vectorFromArray,
+  Schema,
+  Table as ArrowTable,
+  RecordBatchStreamWriter,
+  List,
+  RecordBatch,
+  makeData,
+  Struct,
+  type Float,
+  DataType,
+  Binary,
+  Float32
+} from "apache-arrow";
+import { type EmbeddingFunction } from "./index";
+import { sanitizeSchema } from "./sanitize";
+
+/*
+ * Options to control how a column should be converted to a vector array
+ */
+export class VectorColumnOptions {
+  /** Vector column type. */
+  type: Float = new Float32();
+
+  constructor(values?: Partial<VectorColumnOptions>) {
+    Object.assign(this, values);
+  }
+}
+
+/** Options to control the makeArrowTable call. */
+export class MakeArrowTableOptions {
+  /*
+   * Schema of the data.
+   *
+   * If this is not provided then the data type will be inferred from the
+   * JS type.  Integer numbers will become int64, floating point numbers
+   * will become float64 and arrays will become variable sized lists with
+   * the data type inferred from the first element in the array.
+   *
+   * The schema must be specified if there are no records (e.g. to make
+   * an empty table)
+   */
+  schema?: Schema;
+
+  /*
+   * Mapping from vector column name to expected type
+   *
+   * Lance expects vector columns to be fixed size list arrays (i.e. tensors)
+   * However, `makeArrowTable` will not infer this by default (it creates
+   * variable size list arrays).  This field can be used to indicate that a column
+   * should be treated as a vector column and converted to a fixed size list.
+   *
+   * The keys should be the names of the vector columns.  The value specifies the
+   * expected data type of the vector columns.
+   *
+   * If `schema` is provided then this field is ignored.
+   *
+   * By default, the column named "vector" will be assumed to be a float32
+   * vector column.
+   */
+  vectorColumns: Record<string, VectorColumnOptions> = {
+    vector: new VectorColumnOptions()
+  };
+
+  embeddings?: EmbeddingFunction<any>;
+
+  /**
+   * If true then string columns will be encoded with dictionary encoding
+   *
+   * Set this to true if your string columns tend to repeat the same values
+   * often.  For more precise control use the `schema` property to specify the
+   * data type for individual columns.
+   *
+   * If `schema` is provided then this property is ignored.
+   */
+  dictionaryEncodeStrings: boolean = false;
+
+  constructor(values?: Partial<MakeArrowTableOptions>) {
+    Object.assign(this, values);
+  }
+}
+
+/**
+ * An enhanced version of the {@link makeTable} function from Apache Arrow
+ * that supports nested fields and embeddings columns.
+ *
+ * This function converts an array of Record<String, any> (row-major JS objects)
+ * to an Arrow Table (a columnar structure)
+ *
+ * Note that it currently does not support nulls.
+ *
+ * If a schema is provided then it will be used to determine the resulting array
+ * types.  Fields will also be reordered to fit the order defined by the schema.
+ *
+ * If a schema is not provided then the types will be inferred and the field order
+ * will be controlled by the order of properties in the first record.
+ *
+ * If the input is empty then a schema must be provided to create an empty table.
+ *
+ * When a schema is not specified then data types will be inferred.  The inference
+ * rules are as follows:
+ *
+ *  - boolean => Bool
+ *  - number => Float64
+ *  - String => Utf8
+ *  - Buffer => Binary
+ *  - Record<String, any> => Struct
+ *  - Array<any> => List
+ *
+ * @param data input data
+ * @param options options to control the makeArrowTable call.
+ *
+ * @example
+ *
+ * ```ts
+ *
+ * import { fromTableToBuffer, makeArrowTable } from "../arrow";
+ * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
+ *
+ * const schema = new Schema([
+ *   new Field("a", new Int32()),
+ *   new Field("b", new Float32()),
+ *   new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
+ *  ]);
+ *  const table = makeArrowTable([
+ *    { a: 1, b: 2, c: [1, 2, 3] },
+ *    { a: 4, b: 5, c: [4, 5, 6] },
+ *    { a: 7, b: 8, c: [7, 8, 9] },
+ *  ], { schema });
+ * ```
+ *
+ * By default it assumes that the column named `vector` is a vector column
+ * and it will be converted into a fixed size list array of type float32.
+ * The `vectorColumns` option can be used to support other vector column
+ * names and data types.
+ *
+ * ```ts
+ *
+ * const schema = new Schema([
+    new Field("a", new Float64()),
+    new Field("b", new Float64()),
+    new Field(
+      "vector",
+      new FixedSizeList(3, new Field("item", new Float32()))
+    ),
+  ]);
+  const table = makeArrowTable([
+    { a: 1, b: 2, vector: [1, 2, 3] },
+    { a: 4, b: 5, vector: [4, 5, 6] },
+    { a: 7, b: 8, vector: [7, 8, 9] },
+  ]);
+  assert.deepEqual(table.schema, schema);
+ * ```
+ *
+ * You can specify the vector column types and names using the options as well
+ *
+ * ```typescript
+ *
+ * const schema = new Schema([
+    new Field('a', new Float64()),
+    new Field('b', new Float64()),
+    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
+    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
+  ]);
+ * const table = makeArrowTable([
+    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
+    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
+    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
+  ], {
+    vectorColumns: {
+      vec1: { type: new Float16() },
+      vec2: { type: new Float16() }
+    }
+  }
+ * assert.deepEqual(table.schema, schema)
+ * ```
+ */
+export function makeArrowTable(
+  data: Array<Record<string, any>>,
+  options?: Partial<MakeArrowTableOptions>
+): ArrowTable {
+  if (
+    data.length === 0 &&
+    (options?.schema === undefined || options?.schema === null)
+  ) {
+    throw new Error("At least one record or a schema needs to be provided");
+  }
+
+  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
+  if (opt.schema !== undefined && opt.schema !== null) {
+    opt.schema = sanitizeSchema(opt.schema);
+    opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
+  }
+
+  const columns: Record<string, Vector> = {};
+  // TODO: sample dataset to find missing columns
+  // Prefer the field ordering of the schema, if present
+  const columnNames =
+    opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
+  for (const colName of columnNames) {
+    if (
+      data.length !== 0 &&
+      !Object.prototype.hasOwnProperty.call(data[0], colName)
+    ) {
+      // The field is present in the schema, but not in the data, skip it
+      continue;
+    }
+    // Extract a single column from the records (transpose from row-major to col-major)
+    let values = data.map((datum) => datum[colName]);
+
+    // By default (type === undefined) arrow will infer the type from the JS type
+    let type;
+    if (opt.schema !== undefined) {
+      // If there is a schema provided, then use that for the type instead
+      type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
+      if (DataType.isInt(type) && type.bitWidth === 64) {
+        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
+        values = values.map((v) => {
+          if (v === null) {
+            return v;
+          }
+          return BigInt(v);
+        });
+      }
+    } else {
+      // Otherwise, check to see if this column is one of the vector columns
+      // defined by opt.vectorColumns and, if so, use the fixed size list type
+      const vectorColumnOptions = opt.vectorColumns[colName];
+      if (vectorColumnOptions !== undefined) {
+        type = newVectorType(values[0].length, vectorColumnOptions.type);
+      }
+    }
+
+    try {
+      // Convert an Array of JS values to an arrow vector
+      columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
+    } catch (error: unknown) {
+      // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+      throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
+    }
+  }
+
+  if (opt.schema != null) {
+    // `new ArrowTable(columns)` infers a schema which may sometimes have
+    // incorrect nullability (it assumes nullable=true if there are 0 rows)
+    //
+    // `new ArrowTable(schema, columns)` will also fail because it will create a
+    // batch with an inferred schema and then complain that the batch schema
+    // does not match the provided schema.
+    //
+    // To work around this we first create a table with the wrong schema and
+    // then patch the schema of the batches so we can use
+    // `new ArrowTable(schema, batches)` which does not do any schema inference
+    const firstTable = new ArrowTable(columns);
+    const batchesFixed = firstTable.batches.map(
+      // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+      (batch) => new RecordBatch(opt.schema!, batch.data)
+    );
+    return new ArrowTable(opt.schema, batchesFixed);
+  } else {
+    return new ArrowTable(columns);
+  }
+}
+
+/**
+ * Create an empty Arrow table with the provided schema
+ */
+export function makeEmptyTable(schema: Schema): ArrowTable {
+  return makeArrowTable([], { schema });
+}
+
+// Helper function to convert Array<Array<any>> to a variable sized list array
+function makeListVector(lists: any[][]): Vector<any> {
+  if (lists.length === 0 || lists[0].length === 0) {
+    throw Error("Cannot infer list vector from empty array or empty list");
+  }
+  const sampleList = lists[0];
+  let inferredType;
+  try {
+    const sampleVector = makeVector(sampleList);
+    inferredType = sampleVector.type;
+  } catch (error: unknown) {
+    // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+    throw Error(`Cannot infer list vector.  Cannot infer inner type: ${error}`);
+  }
+
+  const listBuilder = makeBuilder({
+    type: new List(new Field("item", inferredType, true))
+  });
+  for (const list of lists) {
+    listBuilder.append(list);
+  }
+  return listBuilder.finish().toVector();
+}
+
+// Helper function to convert an Array of JS values to an Arrow Vector
+function makeVector(
+  values: any[],
+  type?: DataType,
+  stringAsDictionary?: boolean
+): Vector<any> {
+  if (type !== undefined) {
+    // No need for inference, let Arrow create it
+    return vectorFromArray(values, type);
+  }
+  if (values.length === 0) {
+    throw Error(
+      "makeVector requires at least one value or the type must be specfied"
+    );
+  }
+  const sampleValue = values.find((val) => val !== null && val !== undefined);
+  if (sampleValue === undefined) {
+    throw Error(
+      "makeVector cannot infer the type if all values are null or undefined"
+    );
+  }
+  if (Array.isArray(sampleValue)) {
+    // Default Arrow inference doesn't handle list types
+    return makeListVector(values);
+  } else if (Buffer.isBuffer(sampleValue)) {
+    // Default Arrow inference doesn't handle Buffer
+    return vectorFromArray(values, new Binary());
+  } else if (
+    !(stringAsDictionary ?? false) &&
+    (typeof sampleValue === "string" || sampleValue instanceof String)
+  ) {
+    // If the type is string then don't use Arrow's default inference unless dictionaries are requested
+    // because it will always use dictionary encoding for strings
+    return vectorFromArray(values, new Utf8());
+  } else {
+    // Convert a JS array of values to an arrow vector
+    return vectorFromArray(values);
+  }
+}
+
+async function applyEmbeddings<T>(
+  table: ArrowTable,
+  embeddings?: EmbeddingFunction<T>,
+  schema?: Schema
+): Promise<ArrowTable> {
+  if (embeddings == null) {
+    return table;
+  }
+  if (schema !== undefined && schema !== null) {
+    schema = sanitizeSchema(schema);
+  }
+
+  // Convert from ArrowTable to Record<String, Vector>
+  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
+    const name = table.schema.fields[idx].name;
+    // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+    const vec = table.getChildAt(idx)!;
+    return [name, vec];
+  });
+  const newColumns = Object.fromEntries(colEntries);
+
+  const sourceColumn = newColumns[embeddings.sourceColumn];
+  const destColumn = embeddings.destColumn ?? "vector";
+  const innerDestType = embeddings.embeddingDataType ?? new Float32();
+  if (sourceColumn === undefined) {
+    throw new Error(
+      `Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`
+    );
+  }
+
+  if (table.numRows === 0) {
+    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
+      // We have an empty table and it already has the embedding column so no work needs to be done
+      // Note: we don't return an error like we did below because this is a common occurrence.  For example,
+      // if we call convertToTable with 0 records and a schema that includes the embedding
+      return table;
+    }
+    if (embeddings.embeddingDimension !== undefined) {
+      const destType = newVectorType(
+        embeddings.embeddingDimension,
+        innerDestType
+      );
+      newColumns[destColumn] = makeVector([], destType);
+    } else if (schema != null) {
+      const destField = schema.fields.find((f) => f.name === destColumn);
+      if (destField != null) {
+        newColumns[destColumn] = makeVector([], destField.type);
+      } else {
+        throw new Error(
+          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`
+        );
+      }
+    } else {
+      throw new Error(
+        "Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`"
+      );
+    }
+  } else {
+    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`
+      );
+    }
+    if (table.batches.length > 1) {
+      throw new Error(
+        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch"
+      );
+    }
+    const values = sourceColumn.toArray();
+    const vectors = await embeddings.embed(values as T[]);
+    if (vectors.length !== values.length) {
+      throw new Error(
+        "Embedding function did not return an embedding for each input element"
+      );
+    }
+    const destType = newVectorType(vectors[0].length, innerDestType);
+    newColumns[destColumn] = makeVector(vectors, destType);
+  }
+
+  const newTable = new ArrowTable(newColumns);
+  if (schema != null) {
+    if (schema.fields.find((f) => f.name === destColumn) === undefined) {
+      throw new Error(
+        `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`
+      );
+    }
+    return alignTable(newTable, schema);
+  }
+  return newTable;
+}
+
+/*
+ * Convert an Array of records into an Arrow Table, optionally applying an
+ * embeddings function to it.
+ *
+ * This function calls `makeArrowTable` first to create the Arrow Table.
+ * Any provided `makeTableOptions` (e.g. a schema) will be passed on to
+ * that call.
+ *
+ * The embedding function will be passed a column of values (based on the
+ * `sourceColumn` of the embedding function) and expects to receive back
+ * number[][] which will be converted into a fixed size list column.  By
+ * default this will be a fixed size list of Float32 but that can be
+ * customized by the `embeddingDataType` property of the embedding function.
+ *
+ * If a schema is provided in `makeTableOptions` then it should include the
+ * embedding columns.  If no schema is provded then embedding columns will
+ * be placed at the end of the table, after all of the input columns.
+ */
+export async function convertToTable<T>(
+  data: Array<Record<string, unknown>>,
+  embeddings?: EmbeddingFunction<T>,
+  makeTableOptions?: Partial<MakeArrowTableOptions>
+): Promise<ArrowTable> {
+  const table = makeArrowTable(data, makeTableOptions);
+  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
+}
+
+// Creates the Arrow Type for a Vector column with dimension `dim`
+function newVectorType<T extends Float>(
+  dim: number,
+  innerType: T
+): FixedSizeList<T> {
+  // Somewhere we always default to have the elements nullable, so we need to set it to true
+  // otherwise we often get schema mismatches because the stored data always has schema with nullable elements
+  const children = new Field<T>("item", innerType, true);
+  return new FixedSizeList(dim, children);
+}
+
+/**
+ * Serialize an Array of records into a buffer using the Arrow IPC File serialization
+ *
+ * This function will call `convertToTable` and pass on `embeddings` and `schema`
+ *
+ * `schema` is required if data is empty
+ */
+export async function fromRecordsToBuffer<T>(
+  data: Array<Record<string, unknown>>,
+  embeddings?: EmbeddingFunction<T>,
+  schema?: Schema
+): Promise<Buffer> {
+  if (schema !== undefined && schema !== null) {
+    schema = sanitizeSchema(schema);
+  }
+  const table = await convertToTable(data, embeddings, { schema, embeddings });
+  const writer = RecordBatchFileWriter.writeAll(table);
+  return Buffer.from(await writer.toUint8Array());
+}
+
+/**
+ * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
+ *
+ * This function will call `convertToTable` and pass on `embeddings` and `schema`
+ *
+ * `schema` is required if data is empty
+ */
+export async function fromRecordsToStreamBuffer<T>(
+  data: Array<Record<string, unknown>>,
+  embeddings?: EmbeddingFunction<T>,
+  schema?: Schema
+): Promise<Buffer> {
+  if (schema !== null && schema !== undefined) {
+    schema = sanitizeSchema(schema);
+  }
+  const table = await convertToTable(data, embeddings, { schema });
+  const writer = RecordBatchStreamWriter.writeAll(table);
+  return Buffer.from(await writer.toUint8Array());
+}
+
+/**
+ * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
+ *
+ * This function will apply `embeddings` to the table in a manner similar to
+ * `convertToTable`.
+ *
+ * `schema` is required if the table is empty
+ */
+export async function fromTableToBuffer<T>(
+  table: ArrowTable,
+  embeddings?: EmbeddingFunction<T>,
+  schema?: Schema
+): Promise<Buffer> {
+  if (schema !== null && schema !== undefined) {
+    schema = sanitizeSchema(schema);
+  }
+  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
+  const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
+  return Buffer.from(await writer.toUint8Array());
+}
+
+/**
+ * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
+ *
+ * This function will apply `embeddings` to the table in a manner similar to
+ * `convertToTable`.
+ *
+ * `schema` is required if the table is empty
+ */
+export async function fromTableToStreamBuffer<T>(
+  table: ArrowTable,
+  embeddings?: EmbeddingFunction<T>,
+  schema?: Schema
+): Promise<Buffer> {
+  if (schema !== null && schema !== undefined) {
+    schema = sanitizeSchema(schema);
+  }
+  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
+  const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
+  return Buffer.from(await writer.toUint8Array());
+}
+
+function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
+  const alignedChildren = [];
+  for (const field of schema.fields) {
+    const indexInBatch = batch.schema.fields?.findIndex(
+      (f) => f.name === field.name
+    );
+    if (indexInBatch < 0) {
+      throw new Error(
+        `The column ${field.name} was not found in the Arrow Table`
+      );
+    }
+    alignedChildren.push(batch.data.children[indexInBatch]);
+  }
+  const newData = makeData({
+    type: new Struct(schema.fields),
+    length: batch.numRows,
+    nullCount: batch.nullCount,
+    children: alignedChildren
+  });
+  return new RecordBatch(schema, newData);
+}
+
+function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
+  const alignedBatches = table.batches.map((batch) =>
+    alignBatch(batch, schema)
+  );
+  return new ArrowTable(schema, alignedBatches);
+}
+
+// Creates an empty Arrow Table
+export function createEmptyTable(schema: Schema): ArrowTable {
+  return new ArrowTable(sanitizeSchema(schema));
+}
+
+function validateSchemaEmbeddings(
+  schema: Schema<any>,
+  data: Array<Record<string, unknown>>,
+  embeddings: EmbeddingFunction<any> | undefined
+) {
+  const fields = [];
+  const missingEmbeddingFields = [];
+
+  // First we check if the field is a `FixedSizeList`
+  // Then we check if the data contains the field
+  // if it does not, we add it to the list of missing embedding fields
+  // Finally, we check if those missing embedding fields are `this._embeddings`
+  // if they are not, we throw an error
+  for (const field of schema.fields) {
+    if (field.type instanceof FixedSizeList) {
+      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
+        missingEmbeddingFields.push(field);
+      } else {
+        fields.push(field);
+      }
+    } else {
+      fields.push(field);
+    }
+  }
+
+  if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
+    throw new Error(
+      `Table has embeddings: "${missingEmbeddingFields
+        .map((f) => f.name)
+        .join(",")}", but no embedding function was provided`
+    );
+  }
+
+  return new Schema(fields, schema.metadata);
+}
--- a/node/src/embedding/embedding_function.ts
+++ b/node/src/embedding/embedding_function.ts
@@ -0,0 +1,68 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { type Float } from 'apache-arrow'
+
+/**
+ * An embedding function that automatically creates vector representation for a given column.
+ */
+export interface EmbeddingFunction<T> {
+  /**
+   * The name of the column that will be used as input for the Embedding Function.
+   */
+  sourceColumn: string
+
+  /**
+   * The data type of the embedding
+   *
+   * The embedding function should return `number`.  This will be converted into
+   * an Arrow float array.  By default this will be Float32 but this property can
+   * be used to control the conversion.
+   */
+  embeddingDataType?: Float
+
+  /**
+   * The dimension of the embedding
+   *
+   * This is optional, normally this can be determined by looking at the results of
+   * `embed`.  If this is not specified, and there is an attempt to apply the embedding
+   * to an empty table, then that process will fail.
+   */
+  embeddingDimension?: number
+
+  /**
+   * The name of the column that will contain the embedding
+   *
+   * By default this is "vector"
+   */
+  destColumn?: string
+
+  /**
+   * Should the source column be excluded from the resulting table
+   *
+   * By default the source column is included.  Set this to true and
+   * only the embedding will be stored.
+   */
+  excludeSource?: boolean
+
+  /**
+   * Creates a vector representation for the given values.
+   */
+  embed: (data: T[]) => Promise<number[][]>
+}
+
+export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
+  return typeof value.sourceColumn === 'string' &&
+      typeof value.embed === 'function'
+}
--- a/node/src/embedding/openai.ts
+++ b/node/src/embedding/openai.ts
@@ -0,0 +1,57 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { type EmbeddingFunction } from '../index'
+import type OpenAI from 'openai'
+
+export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
+  private readonly _openai: OpenAI
+  private readonly _modelName: string
+
+  constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
+    /**
+     * @type {import("openai").default}
+     */
+    let Openai
+    try {
+      // eslint-disable-next-line @typescript-eslint/no-var-requires
+      Openai = require('openai')
+    } catch {
+      throw new Error('please install openai@^4.24.1 using npm install openai')
+    }
+
+    this.sourceColumn = sourceColumn
+    const configuration = {
+      apiKey: openAIKey
+    }
+
+    this._openai = new Openai(configuration)
+    this._modelName = modelName
+  }
+
+  async embed (data: string[]): Promise<number[][]> {
+    const response = await this._openai.embeddings.create({
+      model: this._modelName,
+      input: data
+    })
+
+    const embeddings: number[][] = []
+    for (let i = 0; i < response.data.length; i++) {
+      embeddings.push(response.data[i].embedding)
+    }
+    return embeddings
+  }
+
+  sourceColumn: string
+}
--- a/node/src/index.ts
+++ b/node/src/index.ts
--- a/node/src/integration_test/test.ts
+++ b/node/src/integration_test/test.ts
@@ -0,0 +1,180 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { describe } from 'mocha'
+import * as chai from 'chai'
+import { assert } from 'chai'
+import * as chaiAsPromised from 'chai-as-promised'
+import { v4 as uuidv4 } from 'uuid'
+
+import * as lancedb from '../index'
+import { tmpdir } from 'os'
+import * as fs from 'fs'
+import * as path from 'path'
+
+chai.use(chaiAsPromised)
+
+describe('LanceDB AWS Integration test', function () {
+  it('s3+ddb schema is processed correctly', async function () {
+    this.timeout(15000)
+
+    // WARNING: specifying engine is NOT a publicly supported feature in lancedb yet
+    // THE API WILL CHANGE
+    const conn = await lancedb.connect('s3://lancedb-integtest?engine=ddb&ddbTableName=lancedb-integtest')
+    const data = [{ vector: Array(128).fill(1.0) }]
+
+    const tableName = uuidv4()
+    let table = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
+
+    const futs = [table.add(data), table.add(data), table.add(data), table.add(data), table.add(data)]
+    await Promise.allSettled(futs)
+
+    table = await conn.openTable(tableName)
+    assert.equal(await table.countRows(), 6)
+  })
+})
+
+describe('LanceDB Mirrored Store Integration test', function () {
+  it('s3://...?mirroredStore=... param is processed correctly', async function () {
+    this.timeout(600000)
+
+    const dir = tmpdir()
+    console.log(dir)
+    const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
+    const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
+    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 1 }))
+    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 2 }))
+    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 3 }))
+
+    const tableName = uuidv4()
+
+    // try create table and check if it's mirrored
+    const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
+
+    const mirroredPath = path.join(dir, `${tableName}.lance`)
+    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
+      if (err != null) throw err
+      // there should be three dirs
+      assert.equal(files.length, 3)
+      assert.isTrue(files[0].isDirectory())
+      assert.isTrue(files[1].isDirectory())
+
+      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.txn'))
+      })
+
+      fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.manifest'))
+      })
+
+      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.lance'))
+      })
+    })
+
+    // try create index and check if it's mirrored
+    await t.createIndex({ column: 'vector', type: 'ivf_pq' })
+
+    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
+      if (err != null) throw err
+      // there should be four dirs
+      assert.equal(files.length, 4)
+      assert.isTrue(files[0].isDirectory())
+      assert.isTrue(files[1].isDirectory())
+      assert.isTrue(files[2].isDirectory())
+
+      // Two TXs now
+      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 2)
+        assert.isTrue(files[0].name.endsWith('.txn'))
+        assert.isTrue(files[1].name.endsWith('.txn'))
+      })
+
+      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.lance'))
+      })
+
+      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].isDirectory())
+
+        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
+          if (err != null) throw err
+
+          assert.equal(files.length, 1)
+          assert.isTrue(files[0].isFile())
+          assert.isTrue(files[0].name.endsWith('.idx'))
+        })
+      })
+    })
+
+    // try delete and check if it's mirrored
+    await t.delete('id = 0')
+
+    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
+      if (err != null) throw err
+      // there should be five dirs
+      assert.equal(files.length, 5)
+      assert.isTrue(files[0].isDirectory())
+      assert.isTrue(files[1].isDirectory())
+      assert.isTrue(files[2].isDirectory())
+      assert.isTrue(files[3].isDirectory())
+      assert.isTrue(files[4].isDirectory())
+
+      // Three TXs now
+      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 3)
+        assert.isTrue(files[0].name.endsWith('.txn'))
+        assert.isTrue(files[1].name.endsWith('.txn'))
+      })
+
+      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.lance'))
+      })
+
+      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].isDirectory())
+
+        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
+          if (err != null) throw err
+
+          assert.equal(files.length, 1)
+          assert.isTrue(files[0].isFile())
+          assert.isTrue(files[0].name.endsWith('.idx'))
+        })
+      })
+
+      fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.arrow'))
+      })
+    })
+  })
+})
--- a/node/src/middleware.ts
+++ b/node/src/middleware.ts
@@ -0,0 +1,58 @@
+// Copyright 2024 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * Middleware for Remote LanceDB Connection or Table
+ */
+export interface HttpMiddleware {
+  /**
+   * A callback that can be used to instrument the behavior of http requests to remote
+   * tables. It can be used to add headers, modify the request, or even short-circuit
+   * the request and return a response without making the request to the remote endpoint.
+   * It can also be used to modify the response from the remote endpoint.
+   *
+   * @param {RemoteResponse} res - Request to the remote endpoint
+   * @param {onRemoteRequestNext} next - Callback to advance the middleware chain
+   */
+  onRemoteRequest(
+    req: RemoteRequest,
+    next: (req: RemoteRequest) => Promise<RemoteResponse>,
+  ): Promise<RemoteResponse>
+};
+
+export enum Method {
+  GET,
+  POST
+}
+
+/**
+ * A LanceDB Remote HTTP Request
+ */
+export interface RemoteRequest {
+  uri: string
+  method: Method
+  headers: Map<string, string>
+  params?: Map<string, string>
+  body?: any
+}
+
+/**
+ * A LanceDB Remote HTTP Response
+ */
+export interface RemoteResponse {
+  status: number
+  statusText: string
+  headers: Map<string, string>
+  body: () => Promise<any>
+}
--- a/node/src/query.ts
+++ b/node/src/query.ts
@@ -0,0 +1,163 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { Vector, tableFromIPC } from 'apache-arrow'
+import { type EmbeddingFunction } from './embedding/embedding_function'
+import { type MetricType } from '.'
+
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+const { tableSearch } = require('../native.js')
+
+/**
+ * A builder for nearest neighbor queries for LanceDB.
+ */
+export class Query<T = number[]> {
+  private readonly _query?: T
+  private readonly _tbl?: any
+  private _queryVector?: number[]
+  private _limit?: number
+  private _refineFactor?: number
+  private _nprobes: number
+  private _select?: string[]
+  private _filter?: string
+  private _metricType?: MetricType
+  private _prefilter: boolean
+  private _fastSearch: boolean
+  protected readonly _embeddings?: EmbeddingFunction<T>
+
+  constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
+    this._tbl = tbl
+    this._query = query
+    this._limit = 10
+    this._nprobes = 20
+    this._refineFactor = undefined
+    this._select = undefined
+    this._filter = undefined
+    this._metricType = undefined
+    this._embeddings = embeddings
+    this._prefilter = false
+    this._fastSearch = false
+  }
+
+  /***
+     * Sets the number of results that will be returned
+     * default value is 10
+     * @param value number of results
+     */
+  limit (value: number): Query<T> {
+    this._limit = value
+    return this
+  }
+
+  /**
+     * Refine the results by reading extra elements and re-ranking them in memory.
+     * @param value refine factor to use in this query.
+     */
+  refineFactor (value: number): Query<T> {
+    this._refineFactor = value
+    return this
+  }
+
+  /**
+     * The number of probes used. A higher number makes search more accurate but also slower.
+     * @param value The number of probes used.
+     */
+  nprobes (value: number): Query<T> {
+    this._nprobes = value
+    return this
+  }
+
+  /**
+     * A filter statement to be applied to this query.
+     * @param value A filter in the same format used by a sql WHERE clause.
+     */
+  filter (value: string): Query<T> {
+    this._filter = value
+    return this
+  }
+
+  where = this.filter
+
+  /** Return only the specified columns.
+     *
+     * @param value Only select the specified columns. If not specified, all columns will be returned.
+     */
+  select (value: string[]): Query<T> {
+    this._select = value
+    return this
+  }
+
+  /**
+     * The MetricType used for this Query.
+     * @param value The metric to the. @see MetricType for the different options
+     */
+  metricType (value: MetricType): Query<T> {
+    this._metricType = value
+    return this
+  }
+
+  prefilter (value: boolean): Query<T> {
+    this._prefilter = value
+    return this
+  }
+
+  /**
+   * Skip searching un-indexed data. This can make search faster, but will miss
+   * any data that is not yet indexed.
+   */
+  fastSearch (value: boolean): Query<T> {
+    this._fastSearch = value
+    return this
+  }
+
+  /**
+     * Execute the query and return the results as an Array of Objects
+     */
+  async execute<T = Record<string, unknown>> (): Promise<T[]> {
+    if (this._query !== undefined) {
+      if (this._embeddings !== undefined) {
+        this._queryVector = (await this._embeddings.embed([this._query]))[0]
+      } else {
+        this._queryVector = this._query as number[]
+      }
+    }
+
+    const isElectron = this.isElectron()
+    const buffer = await tableSearch.call(this._tbl, this, isElectron)
+    const data = tableFromIPC(buffer)
+
+    return data.toArray().map((entry: Record<string, unknown>) => {
+      const newObject: Record<string, unknown> = {}
+      Object.keys(entry).forEach((key: string) => {
+        if (entry[key] instanceof Vector) {
+          // toJSON() returns f16 array correctly
+          newObject[key] = (entry[key] as any).toJSON()
+        } else {
+          newObject[key] = entry[key] as any
+        }
+      })
+      return newObject as unknown as T
+    })
+  }
+
+  // See https://github.com/electron/electron/issues/2288
+  private isElectron (): boolean {
+    try {
+      // eslint-disable-next-line no-prototype-builtins
+      return (process?.versions?.hasOwnProperty('electron') || navigator?.userAgent?.toLowerCase()?.includes(' electron'))
+    } catch (e) {
+      return false
+    }
+  }
+}
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -0,0 +1,302 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import axios, { type AxiosError, type AxiosResponse, type ResponseType } from 'axios'
+
+import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
+
+import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
+import type { MetricType } from '..'
+
+interface HttpLancedbClientMiddleware {
+  onRemoteRequest(
+    req: RemoteRequest,
+    next: (req: RemoteRequest) => Promise<RemoteResponse>,
+  ): Promise<RemoteResponse>
+}
+
+/**
+ * Invoke the middleware chain and at the end call the remote endpoint
+ */
+async function callWithMiddlewares (
+  req: RemoteRequest,
+  middlewares: HttpLancedbClientMiddleware[],
+  opts?: MiddlewareInvocationOptions
+): Promise<RemoteResponse> {
+  async function call (
+    i: number,
+    req: RemoteRequest
+  ): Promise<RemoteResponse> {
+    // if we have reached the end of the middleware chain, make the request
+    if (i > middlewares.length) {
+      const headers = Object.fromEntries(req.headers.entries())
+      const params = Object.fromEntries(req.params?.entries() ?? [])
+      const timeout = opts?.timeout
+      let res
+      if (req.method === Method.POST) {
+        res = await axios.post(
+          req.uri,
+          req.body,
+          {
+            headers,
+            params,
+            timeout,
+            responseType: opts?.responseType
+          }
+        )
+      } else {
+        res = await axios.get(
+          req.uri,
+          {
+            headers,
+            params,
+            timeout
+          }
+        )
+      }
+
+      return toLanceRes(res)
+    }
+
+    // call next middleware in chain
+    return await middlewares[i - 1].onRemoteRequest(
+      req,
+      async (req) => {
+        return await call(i + 1, req)
+      }
+    )
+  }
+
+  return await call(1, req)
+}
+
+interface MiddlewareInvocationOptions {
+  responseType?: ResponseType
+  timeout?: number
+}
+
+/**
+ * Marshall the library response into a LanceDB response
+ */
+function toLanceRes (res: AxiosResponse): RemoteResponse {
+  const headers = new Map()
+  for (const h in res.headers) {
+    headers.set(h, res.headers[h])
+  }
+
+  return {
+    status: res.status,
+    statusText: res.statusText,
+    headers,
+    body: async () => {
+      return res.data
+    }
+  }
+}
+
+async function decodeErrorData(
+  res: RemoteResponse,
+  responseType?: ResponseType
+): Promise<string> {
+  const errorData = await res.body()
+  if (responseType === 'arraybuffer') {
+      return new TextDecoder().decode(errorData)
+  } else {
+    if (typeof errorData === 'object') {
+      return JSON.stringify(errorData)
+    }
+
+    return errorData
+  }
+}
+
+export class HttpLancedbClient {
+  private readonly _url: string
+  private readonly _apiKey: () => string
+  private readonly _middlewares: HttpLancedbClientMiddleware[]
+  private readonly _timeout: number | undefined
+
+  public constructor (
+    url: string,
+    apiKey: string,
+    timeout?: number,
+    private readonly _dbName?: string
+
+  ) {
+    this._url = url
+    this._apiKey = () => apiKey
+    this._middlewares = []
+    this._timeout = timeout
+  }
+
+  get uri (): string {
+    return this._url
+  }
+
+  public async search (
+    tableName: string,
+    vector: number[],
+    k: number,
+    nprobes: number,
+    prefilter: boolean,
+    refineFactor?: number,
+    columns?: string[],
+    filter?: string,
+    metricType?: MetricType,
+    fastSearch?: boolean
+  ): Promise<ArrowTable<any>> {
+    const result = await this.post(
+      `/v1/table/${tableName}/query/`,
+      {
+        vector,
+        k,
+        nprobes,
+        refine_factor: refineFactor,
+        columns,
+        filter,
+        prefilter,
+        metric: metricType,
+        fast_search: fastSearch
+      },
+      undefined,
+      undefined,
+      'arraybuffer'
+    )
+    const table = tableFromIPC(await result.body())
+    return table
+  }
+
+  /**
+   * Sent GET request.
+   */
+  public async get (path: string, params?: Record<string, string>): Promise<RemoteResponse> {
+    const req = {
+      uri: `${this._url}${path}`,
+      method: Method.GET,
+      headers: new Map(Object.entries({
+        'Content-Type': 'application/json',
+        'x-api-key': this._apiKey(),
+        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
+      })),
+      params: new Map(Object.entries(params ?? {}))
+    }
+
+    let response
+    try {
+      response = await callWithMiddlewares(req, this._middlewares)
+      return response
+    } catch (err: any) {
+      console.error(serializeErrorAsJson(err))
+      if (err.response === undefined) {
+        throw new Error(`Network Error: ${err.message as string}`)
+      }
+
+      response = toLanceRes(err.response)
+    }
+
+    if (response.status !== 200) {
+      const errorData = await decodeErrorData(response)
+      throw new Error(
+        `Server Error, status: ${response.status}, ` +
+        `message: ${response.statusText}: ${errorData}`
+      )
+    }
+
+    return response
+  }
+
+  /**
+   * Sent POST request.
+   */
+  public async post (
+    path: string,
+    data?: any,
+    params?: Record<string, string>,
+    content?: string | undefined,
+    responseType?: ResponseType | undefined
+  ): Promise<RemoteResponse> {
+    const req = {
+      uri: `${this._url}${path}`,
+      method: Method.POST,
+      headers: new Map(Object.entries({
+        'Content-Type': content ?? 'application/json',
+        'x-api-key': this._apiKey(),
+        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
+      })),
+      params: new Map(Object.entries(params ?? {})),
+      body: data
+    }
+
+    let response
+    try {
+      response = await callWithMiddlewares(req, this._middlewares, {
+        responseType,
+        timeout: this._timeout
+      })
+
+      // return response
+    } catch (err: any) {
+      console.error(serializeErrorAsJson(err))
+
+      if (err.response === undefined) {
+        throw new Error(`Network Error: ${err.message as string}`)
+      }
+      response = toLanceRes(err.response)
+    }
+
+    if (response.status !== 200) {
+      const errorData = await decodeErrorData(response, responseType)
+      throw new Error(
+        `Server Error, status: ${response.status}, ` +
+        `message: ${response.statusText}: ${errorData}`
+      )
+    }
+
+    return response
+  }
+
+  /**
+   * Instrument this client with middleware
+   * @param mw - The middleware that instruments the client
+   * @returns - an instance of this client instrumented with the middleware
+   */
+  public withMiddleware (mw: HttpLancedbClientMiddleware): HttpLancedbClient {
+    const wrapped = this.clone()
+    wrapped._middlewares.push(mw)
+    return wrapped
+  }
+
+  /**
+   * Make a clone of this client
+   */
+  private clone (): HttpLancedbClient {
+    const clone = new HttpLancedbClient(this._url, this._apiKey(), this._timeout, this._dbName)
+    for (const mw of this._middlewares) {
+      clone._middlewares.push(mw)
+    }
+    return clone
+  }
+}
+
+function serializeErrorAsJson(err: AxiosError) {
+  const error = JSON.parse(JSON.stringify(err, Object.getOwnPropertyNames(err)))
+  error.response = err.response != null
+      ? JSON.parse(JSON.stringify(
+        err.response,
+        // config contains the request data, too noisy
+        Object.getOwnPropertyNames(err.response).filter(prop => prop !== 'config')
+      ))
+      : null
+  return JSON.stringify({ error })
+}
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -0,0 +1,567 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import {
+  type EmbeddingFunction,
+  type Table,
+  type VectorIndexParams,
+  type Connection,
+  type ConnectionOptions,
+  type CreateTableOptions,
+  type VectorIndex,
+  type WriteOptions,
+  type IndexStats,
+  type UpdateArgs,
+  type UpdateSqlArgs,
+  makeArrowTable,
+  type MergeInsertArgs,
+  type ColumnAlteration
+} from '../index'
+import { Query } from '../query'
+
+import { Vector, Table as ArrowTable } from 'apache-arrow'
+import { HttpLancedbClient } from './client'
+import { isEmbeddingFunction } from '../embedding/embedding_function'
+import {
+  createEmptyTable,
+  fromRecordsToStreamBuffer,
+  fromTableToStreamBuffer
+} from '../arrow'
+import { toSQL, TTLCache } from '../util'
+import { type HttpMiddleware } from '../middleware'
+
+/**
+ * Remote connection.
+ */
+export class RemoteConnection implements Connection {
+  private _client: HttpLancedbClient
+  private readonly _dbName: string
+  private readonly _tableCache = new TTLCache(300_000)
+
+  constructor (opts: ConnectionOptions) {
+    if (!opts.uri.startsWith('db://')) {
+      throw new Error(`Invalid remote DB URI: ${opts.uri}`)
+    }
+    if (opts.apiKey == null || opts.apiKey === '') {
+      opts = Object.assign({}, opts, { apiKey: process.env.LANCEDB_API_KEY })
+    }
+    if (opts.apiKey === undefined || opts.region === undefined) {
+      throw new Error(
+        'API key and region are must be passed for remote connections. ' +
+        'API key can also be set through LANCEDB_API_KEY env variable.')
+    }
+
+    this._dbName = opts.uri.slice('db://'.length)
+    let server: string
+    if (opts.hostOverride === undefined) {
+      server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
+    } else {
+      server = opts.hostOverride
+    }
+    this._client = new HttpLancedbClient(
+      server,
+      opts.apiKey,
+      opts.timeout,
+      opts.hostOverride === undefined ? undefined : this._dbName
+    )
+  }
+
+  get uri (): string {
+    // add the lancedb+ prefix back
+    return 'db://' + this._client.uri
+  }
+
+  async tableNames (
+    pageToken: string = '',
+    limit: number = 10
+  ): Promise<string[]> {
+    const response = await this._client.get('/v1/table/', {
+      limit: `${limit}`,
+      page_token: pageToken
+    })
+    const body = await response.body()
+    for (const table of body.tables) {
+      this._tableCache.set(table, true)
+    }
+    return body.tables
+  }
+
+  async openTable (name: string): Promise<Table>
+  async openTable<T>(
+    name: string,
+    embeddings: EmbeddingFunction<T>
+  ): Promise<Table<T>>
+  async openTable<T>(
+    name: string,
+    embeddings?: EmbeddingFunction<T>
+  ): Promise<Table<T>> {
+      // check if the table exists
+      if (this._tableCache.get(name) === undefined) {
+        await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
+        this._tableCache.set(name, true)
+      }
+
+    if (embeddings !== undefined) {
+      return new RemoteTable(this._client, name, embeddings)
+    } else {
+      return new RemoteTable(this._client, name)
+    }
+  }
+
+  async createTable<T>(
+    nameOrOpts: string | CreateTableOptions<T>,
+    data?: Array<Record<string, unknown>> | ArrowTable,
+    optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>,
+    opt?: WriteOptions
+  ): Promise<Table<T>> {
+    // Logic copied from LocatlConnection, refactor these to a base class + connectionImpl pattern
+    let schema
+    let embeddings: undefined | EmbeddingFunction<T>
+    let tableName: string
+    if (typeof nameOrOpts === 'string') {
+      if (
+        optsOrEmbedding !== undefined &&
+        isEmbeddingFunction(optsOrEmbedding)
+      ) {
+        embeddings = optsOrEmbedding
+      }
+      tableName = nameOrOpts
+    } else {
+      schema = nameOrOpts.schema
+      embeddings = nameOrOpts.embeddingFunction
+      tableName = nameOrOpts.name
+      if (data === undefined) {
+        data = nameOrOpts.data
+      }
+    }
+
+    let buffer: Buffer
+
+    function isEmpty (
+      data: Array<Record<string, unknown>> | ArrowTable<any>
+    ): boolean {
+      if (data instanceof ArrowTable) {
+        return data.numRows === 0
+      }
+      return data.length === 0
+    }
+
+    if (data === undefined || isEmpty(data)) {
+      if (schema === undefined) {
+        throw new Error('Either data or schema needs to defined')
+      }
+      buffer = await fromTableToStreamBuffer(createEmptyTable(schema))
+    } else if (data instanceof ArrowTable) {
+      buffer = await fromTableToStreamBuffer(data, embeddings)
+    } else {
+      // data is Array<Record<...>>
+      buffer = await fromRecordsToStreamBuffer(data, embeddings)
+    }
+
+    const res = await this._client.post(
+      `/v1/table/${encodeURIComponent(tableName)}/create/`,
+      buffer,
+      undefined,
+      'application/vnd.apache.arrow.stream'
+    )
+    if (res.status !== 200) {
+      throw new Error(
+        `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+
+    this._tableCache.set(tableName, true)
+    if (embeddings === undefined) {
+      return new RemoteTable(this._client, tableName)
+    } else {
+      return new RemoteTable(this._client, tableName, embeddings)
+    }
+  }
+
+  async dropTable (name: string): Promise<void> {
+    await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
+    this._tableCache.delete(name)
+  }
+
+  withMiddleware (middleware: HttpMiddleware): Connection {
+    const wrapped = this.clone()
+    wrapped._client = wrapped._client.withMiddleware(middleware)
+    return wrapped
+  }
+
+  private clone (): RemoteConnection {
+    const clone: RemoteConnection = Object.create(RemoteConnection.prototype)
+    return Object.assign(clone, this)
+  }
+}
+
+export class RemoteQuery<T = number[]> extends Query<T> {
+  constructor (
+    query: T,
+    private readonly _client: HttpLancedbClient,
+    private readonly _name: string,
+    embeddings?: EmbeddingFunction<T>
+  ) {
+    super(query, undefined, embeddings)
+  }
+
+  // TODO: refactor this to a base class + queryImpl pattern
+  async execute<T = Record<string, unknown>>(): Promise<T[]> {
+    const embeddings = this._embeddings
+    const query = (this as any)._query
+    let queryVector: number[]
+
+    if (embeddings !== undefined) {
+      queryVector = (await embeddings.embed([query]))[0]
+    } else {
+      queryVector = query as number[]
+    }
+
+    const data = await this._client.search(
+      this._name,
+      queryVector,
+      (this as any)._limit,
+      (this as any)._nprobes,
+      (this as any)._prefilter,
+      (this as any)._refineFactor,
+      (this as any)._select,
+      (this as any)._filter,
+      (this as any)._metricType,
+      (this as any)._fastSearch
+    )
+
+    return data.toArray().map((entry: Record<string, unknown>) => {
+      const newObject: Record<string, unknown> = {}
+      Object.keys(entry).forEach((key: string) => {
+        if (entry[key] instanceof Vector) {
+          newObject[key] = (entry[key] as any).toArray()
+        } else {
+          newObject[key] = entry[key] as any
+        }
+      })
+      return newObject as unknown as T
+    })
+  }
+}
+
+// we are using extend until we have next next version release
+// Table and Connection has both been refactored to interfaces
+export class RemoteTable<T = number[]> implements Table<T> {
+  private _client: HttpLancedbClient
+  private readonly _embeddings?: EmbeddingFunction<T>
+  private readonly _name: string
+
+  constructor (client: HttpLancedbClient, name: string)
+  constructor (
+    client: HttpLancedbClient,
+    name: string,
+    embeddings: EmbeddingFunction<T>
+  )
+  constructor (
+    client: HttpLancedbClient,
+    name: string,
+    embeddings?: EmbeddingFunction<T>
+  ) {
+    this._client = client
+    this._name = name
+    this._embeddings = embeddings
+  }
+
+  get name (): string {
+    return this._name
+  }
+
+  get schema (): Promise<any> {
+    return this._client
+      .post(`/v1/table/${encodeURIComponent(this._name)}/describe/`)
+      .then(async (res) => {
+        if (res.status !== 200) {
+          throw new Error(
+            `Server Error, status: ${res.status}, ` +
+              // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+              `message: ${res.statusText}: ${await res.body()}`
+          )
+        }
+        return (await res.body())?.schema
+      })
+  }
+
+  search (query: T): Query<T> {
+    return new RemoteQuery(query, this._client, encodeURIComponent(this._name)) //, this._embeddings_new)
+  }
+
+  filter (where: string): Query<T> {
+    throw new Error('Not implemented')
+  }
+
+  async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> {
+    let tbl: ArrowTable
+    if (data instanceof ArrowTable) {
+      tbl = data
+    } else {
+      tbl = makeArrowTable(data, await this.schema)
+    }
+
+    const queryParams: any = {
+      on
+    }
+    if (args.whenMatchedUpdateAll !== false && args.whenMatchedUpdateAll !== null && args.whenMatchedUpdateAll !== undefined) {
+      queryParams.when_matched_update_all = 'true'
+      if (typeof args.whenMatchedUpdateAll === 'string') {
+        queryParams.when_matched_update_all_filt = args.whenMatchedUpdateAll
+      }
+    } else {
+      queryParams.when_matched_update_all = 'false'
+    }
+    if (args.whenNotMatchedInsertAll ?? false) {
+      queryParams.when_not_matched_insert_all = 'true'
+    } else {
+      queryParams.when_not_matched_insert_all = 'false'
+    }
+    if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) {
+      queryParams.when_not_matched_by_source_delete = 'true'
+      if (typeof args.whenNotMatchedBySourceDelete === 'string') {
+        queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete
+      }
+    } else {
+      queryParams.when_not_matched_by_source_delete = 'false'
+    }
+
+    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
+    const res = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/merge_insert/`,
+      buffer,
+      queryParams,
+      'application/vnd.apache.arrow.stream'
+    )
+    if (res.status !== 200) {
+      throw new Error(
+        `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+  }
+
+  async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
+    let tbl: ArrowTable
+    if (data instanceof ArrowTable) {
+      tbl = data
+    } else {
+      tbl = makeArrowTable(data, await this.schema)
+    }
+
+    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
+    const res = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
+      buffer,
+      {
+        mode: 'append'
+      },
+      'application/vnd.apache.arrow.stream'
+    )
+    if (res.status !== 200) {
+      throw new Error(
+        `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+    return tbl.numRows
+  }
+
+  async overwrite (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
+    let tbl: ArrowTable
+    if (data instanceof ArrowTable) {
+      tbl = data
+    } else {
+      tbl = makeArrowTable(data)
+    }
+    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
+    const res = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
+      buffer,
+      {
+        mode: 'overwrite'
+      },
+      'application/vnd.apache.arrow.stream'
+    )
+    if (res.status !== 200) {
+      throw new Error(
+        `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+    return tbl.numRows
+  }
+
+  async createIndex (indexParams: VectorIndexParams): Promise<void> {
+    const unsupportedParams = [
+      'index_name',
+      'num_partitions',
+      'max_iters',
+      'use_opq',
+      'num_sub_vectors',
+      'num_bits',
+      'max_opq_iters',
+      'replace'
+    ]
+    for (const param of unsupportedParams) {
+      // eslint-disable-next-line @typescript-eslint/strict-boolean-expressions
+      if (indexParams[param as keyof VectorIndexParams]) {
+        throw new Error(`${param} is not supported for remote connections`)
+      }
+    }
+
+    const column = indexParams.column ?? 'vector'
+    const indexType = 'vector'
+    const metricType = indexParams.metric_type ?? 'L2'
+    const indexCacheSize = indexParams.index_cache_size ?? null
+
+    const data = {
+      column,
+      index_type: indexType,
+      metric_type: metricType,
+      index_cache_size: indexCacheSize
+    }
+    const res = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/create_index/`,
+      data
+    )
+    if (res.status !== 200) {
+      throw new Error(
+        `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+  }
+
+  async createScalarIndex (column: string): Promise<void> {
+    const indexType = 'scalar'
+
+    const data = {
+      column,
+      index_type: indexType,
+      replace: true
+    }
+    const res = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/create_scalar_index/`,
+      data
+    )
+    if (res.status !== 200) {
+      throw new Error(
+        `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+  }
+  async dropIndex (index_name: string): Promise<void> {
+    const res = await this._client.post(
+        `/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
+    )
+    if (res.status !== 200) {
+      throw new Error(
+          `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+  }
+
+  async countRows (filter?: string): Promise<number> {
+    const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
+      predicate: filter
+    })
+    return (await result.body())
+  }
+
+  async delete (filter: string): Promise<void> {
+    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/delete/`, {
+      predicate: filter
+    })
+  }
+
+  async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
+    let filter: string | null
+    let updates: Record<string, string>
+
+    if ('valuesSql' in args) {
+      filter = args.where ?? null
+      updates = args.valuesSql
+    } else {
+      filter = args.where ?? null
+      updates = {}
+      for (const [key, value] of Object.entries(args.values)) {
+        updates[key] = toSQL(value)
+      }
+    }
+    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/update/`, {
+      predicate: filter,
+      updates: Object.entries(updates).map(([key, value]) => [key, value])
+    })
+  }
+
+  async listIndices (): Promise<VectorIndex[]> {
+    const results = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/index/list/`
+    )
+    return (await results.body()).indexes?.map((index: any) => ({
+      columns: index.columns,
+      name: index.index_name,
+      uuid: index.index_uuid,
+      status: index.status
+    }))
+  }
+
+  async indexStats (indexName: string): Promise<IndexStats> {
+    const results = await this._client.post(
+      `/v1/table/${encodeURIComponent(this._name)}/index/${indexName}/stats/`
+    )
+    const body = await results.body()
+    return {
+      numIndexedRows: body?.num_indexed_rows,
+      numUnindexedRows: body?.num_unindexed_rows,
+      indexType: body?.index_type,
+      distanceType: body?.distance_type
+    }
+  }
+
+  async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
+    throw new Error('Add columns is not yet supported in LanceDB Cloud.')
+  }
+
+  async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
+    throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
+  }
+
+  async dropColumns (columnNames: string[]): Promise<void> {
+    throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
+  }
+
+  withMiddleware(middleware: HttpMiddleware): Table<T> {
+    const wrapped = this.clone()
+    wrapped._client = wrapped._client.withMiddleware(middleware)
+    return wrapped
+  }
+
+  private clone (): RemoteTable<T> {
+    const clone: RemoteTable<T> = Object.create(RemoteTable.prototype)
+    return Object.assign(clone, this)
+  }
+}
--- a/node/src/sanitize.ts
+++ b/node/src/sanitize.ts
@@ -0,0 +1,508 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The utilities in this file help sanitize data from the user's arrow
+// library into the types expected by vectordb's arrow library.  Node
+// generally allows for mulitple versions of the same library (and sometimes
+// even multiple copies of the same version) to be installed at the same
+// time.  However, arrow-js uses instanceof which expected that the input
+// comes from the exact same library instance.  This is not always the case
+// and so we must sanitize the input to ensure that it is compatible.
+
+import {
+  Field,
+  Utf8,
+  FixedSizeBinary,
+  FixedSizeList,
+  Schema,
+  List,
+  Struct,
+  Float,
+  Bool,
+  Date_,
+  Decimal,
+  type DataType,
+  Dictionary,
+  Binary,
+  Float32,
+  Interval,
+  Map_,
+  Duration,
+  Union,
+  Time,
+  Timestamp,
+  Type,
+  Null,
+  Int,
+  type Precision,
+  type DateUnit,
+  Int8,
+  Int16,
+  Int32,
+  Int64,
+  Uint8,
+  Uint16,
+  Uint32,
+  Uint64,
+  Float16,
+  Float64,
+  DateDay,
+  DateMillisecond,
+  DenseUnion,
+  SparseUnion,
+  TimeNanosecond,
+  TimeMicrosecond,
+  TimeMillisecond,
+  TimeSecond,
+  TimestampNanosecond,
+  TimestampMicrosecond,
+  TimestampMillisecond,
+  TimestampSecond,
+  IntervalDayTime,
+  IntervalYearMonth,
+  DurationNanosecond,
+  DurationMicrosecond,
+  DurationMillisecond,
+  DurationSecond
+} from "apache-arrow";
+import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type";
+
+function sanitizeMetadata(
+  metadataLike?: unknown
+): Map<string, string> | undefined {
+  if (metadataLike === undefined || metadataLike === null) {
+    return undefined;
+  }
+  if (!(metadataLike instanceof Map)) {
+    throw Error("Expected metadata, if present, to be a Map<string, string>");
+  }
+  for (const item of metadataLike) {
+    if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
+      throw Error(
+        "Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values"
+      );
+    }
+  }
+  return metadataLike as Map<string, string>;
+}
+
+function sanitizeInt(typeLike: object) {
+  if (
+    !("bitWidth" in typeLike) ||
+    typeof typeLike.bitWidth !== "number" ||
+    !("isSigned" in typeLike) ||
+    typeof typeLike.isSigned !== "boolean"
+  ) {
+    throw Error(
+      "Expected an Int Type to have a `bitWidth` and `isSigned` property"
+    );
+  }
+  return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
+}
+
+function sanitizeFloat(typeLike: object) {
+  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
+    throw Error("Expected a Float Type to have a `precision` property");
+  }
+  return new Float(typeLike.precision as Precision);
+}
+
+function sanitizeDecimal(typeLike: object) {
+  if (
+    !("scale" in typeLike) ||
+    typeof typeLike.scale !== "number" ||
+    !("precision" in typeLike) ||
+    typeof typeLike.precision !== "number" ||
+    !("bitWidth" in typeLike) ||
+    typeof typeLike.bitWidth !== "number"
+  ) {
+    throw Error(
+      "Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties"
+    );
+  }
+  return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
+}
+
+function sanitizeDate(typeLike: object) {
+  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
+    throw Error("Expected a Date type to have a `unit` property");
+  }
+  return new Date_(typeLike.unit as DateUnit);
+}
+
+function sanitizeTime(typeLike: object) {
+  if (
+    !("unit" in typeLike) ||
+    typeof typeLike.unit !== "number" ||
+    !("bitWidth" in typeLike) ||
+    typeof typeLike.bitWidth !== "number"
+  ) {
+    throw Error(
+      "Expected a Time type to have `unit` and `bitWidth` properties"
+    );
+  }
+  return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
+}
+
+function sanitizeTimestamp(typeLike: object) {
+  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
+    throw Error("Expected a Timestamp type to have a `unit` property");
+  }
+  let timezone = null;
+  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
+    timezone = typeLike.timezone;
+  }
+  return new Timestamp(typeLike.unit, timezone);
+}
+
+function sanitizeTypedTimestamp(
+  typeLike: object,
+  Datatype:
+    | typeof TimestampNanosecond
+    | typeof TimestampMicrosecond
+    | typeof TimestampMillisecond
+    | typeof TimestampSecond
+) {
+  let timezone = null;
+  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
+    timezone = typeLike.timezone;
+  }
+  return new Datatype(timezone);
+}
+
+function sanitizeInterval(typeLike: object) {
+  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
+    throw Error("Expected an Interval type to have a `unit` property");
+  }
+  return new Interval(typeLike.unit);
+}
+
+function sanitizeList(typeLike: object) {
+  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
+    throw Error(
+      "Expected a List type to have an array-like `children` property"
+    );
+  }
+  if (typeLike.children.length !== 1) {
+    throw Error("Expected a List type to have exactly one child");
+  }
+  return new List(sanitizeField(typeLike.children[0]));
+}
+
+function sanitizeStruct(typeLike: object) {
+  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
+    throw Error(
+      "Expected a Struct type to have an array-like `children` property"
+    );
+  }
+  return new Struct(typeLike.children.map((child) => sanitizeField(child)));
+}
+
+function sanitizeUnion(typeLike: object) {
+  if (
+    !("typeIds" in typeLike) ||
+    !("mode" in typeLike) ||
+    typeof typeLike.mode !== "number"
+  ) {
+    throw Error(
+      "Expected a Union type to have `typeIds` and `mode` properties"
+    );
+  }
+  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
+    throw Error(
+      "Expected a Union type to have an array-like `children` property"
+    );
+  }
+
+  return new Union(
+    typeLike.mode,
+    typeLike.typeIds as any,
+    typeLike.children.map((child) => sanitizeField(child))
+  );
+}
+
+function sanitizeTypedUnion(
+  typeLike: object,
+  UnionType: typeof DenseUnion | typeof SparseUnion
+) {
+  if (!("typeIds" in typeLike)) {
+    throw Error(
+      "Expected a DenseUnion/SparseUnion type to have a `typeIds` property"
+    );
+  }
+  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
+    throw Error(
+      "Expected a DenseUnion/SparseUnion type to have an array-like `children` property"
+    );
+  }
+
+  return new UnionType(
+    typeLike.typeIds as any,
+    typeLike.children.map((child) => sanitizeField(child))
+  );
+}
+
+function sanitizeFixedSizeBinary(typeLike: object) {
+  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
+    throw Error(
+      "Expected a FixedSizeBinary type to have a `byteWidth` property"
+    );
+  }
+  return new FixedSizeBinary(typeLike.byteWidth);
+}
+
+function sanitizeFixedSizeList(typeLike: object) {
+  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
+    throw Error("Expected a FixedSizeList type to have a `listSize` property");
+  }
+  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
+    throw Error(
+      "Expected a FixedSizeList type to have an array-like `children` property"
+    );
+  }
+  if (typeLike.children.length !== 1) {
+    throw Error("Expected a FixedSizeList type to have exactly one child");
+  }
+  return new FixedSizeList(
+    typeLike.listSize,
+    sanitizeField(typeLike.children[0])
+  );
+}
+
+function sanitizeMap(typeLike: object) {
+  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
+    throw Error(
+      "Expected a Map type to have an array-like `children` property"
+    );
+  }
+  if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
+    throw Error("Expected a Map type to have a `keysSorted` property");
+  }
+  return new Map_(
+    typeLike.children.map((field) => sanitizeField(field)) as any,
+    typeLike.keysSorted
+  );
+}
+
+function sanitizeDuration(typeLike: object) {
+  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
+    throw Error("Expected a Duration type to have a `unit` property");
+  }
+  return new Duration(typeLike.unit);
+}
+
+function sanitizeDictionary(typeLike: object) {
+  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
+    throw Error("Expected a Dictionary type to have an `id` property");
+  }
+  if (!("indices" in typeLike) || typeof typeLike.indices !== "object") {
+    throw Error("Expected a Dictionary type to have an `indices` property");
+  }
+  if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") {
+    throw Error("Expected a Dictionary type to have an `dictionary` property");
+  }
+  if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
+    throw Error("Expected a Dictionary type to have an `isOrdered` property");
+  }
+  return new Dictionary(
+    sanitizeType(typeLike.dictionary),
+    sanitizeType(typeLike.indices) as any,
+    typeLike.id,
+    typeLike.isOrdered
+  );
+}
+
+function sanitizeType(typeLike: unknown): DataType<any> {
+  if (typeof typeLike !== "object" || typeLike === null) {
+    throw Error("Expected a Type but object was null/undefined");
+  }
+  if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
+    throw Error("Expected a Type to have a typeId function");
+  }
+  let typeId: Type;
+  if (typeof typeLike.typeId === "function") {
+    typeId = (typeLike.typeId as () => unknown)() as Type;
+  } else if (typeof typeLike.typeId === "number") {
+    typeId = typeLike.typeId as Type;
+  } else {
+    throw Error("Type's typeId property was not a function or number");
+  }
+
+  switch (typeId) {
+    case Type.NONE:
+      throw Error("Received a Type with a typeId of NONE");
+    case Type.Null:
+      return new Null();
+    case Type.Int:
+      return sanitizeInt(typeLike);
+    case Type.Float:
+      return sanitizeFloat(typeLike);
+    case Type.Binary:
+      return new Binary();
+    case Type.Utf8:
+      return new Utf8();
+    case Type.Bool:
+      return new Bool();
+    case Type.Decimal:
+      return sanitizeDecimal(typeLike);
+    case Type.Date:
+      return sanitizeDate(typeLike);
+    case Type.Time:
+      return sanitizeTime(typeLike);
+    case Type.Timestamp:
+      return sanitizeTimestamp(typeLike);
+    case Type.Interval:
+      return sanitizeInterval(typeLike);
+    case Type.List:
+      return sanitizeList(typeLike);
+    case Type.Struct:
+      return sanitizeStruct(typeLike);
+    case Type.Union:
+      return sanitizeUnion(typeLike);
+    case Type.FixedSizeBinary:
+      return sanitizeFixedSizeBinary(typeLike);
+    case Type.FixedSizeList:
+      return sanitizeFixedSizeList(typeLike);
+    case Type.Map:
+      return sanitizeMap(typeLike);
+    case Type.Duration:
+      return sanitizeDuration(typeLike);
+    case Type.Dictionary:
+      return sanitizeDictionary(typeLike);
+    case Type.Int8:
+      return new Int8();
+    case Type.Int16:
+      return new Int16();
+    case Type.Int32:
+      return new Int32();
+    case Type.Int64:
+      return new Int64();
+    case Type.Uint8:
+      return new Uint8();
+    case Type.Uint16:
+      return new Uint16();
+    case Type.Uint32:
+      return new Uint32();
+    case Type.Uint64:
+      return new Uint64();
+    case Type.Float16:
+      return new Float16();
+    case Type.Float32:
+      return new Float32();
+    case Type.Float64:
+      return new Float64();
+    case Type.DateMillisecond:
+      return new DateMillisecond();
+    case Type.DateDay:
+      return new DateDay();
+    case Type.TimeNanosecond:
+      return new TimeNanosecond();
+    case Type.TimeMicrosecond:
+      return new TimeMicrosecond();
+    case Type.TimeMillisecond:
+      return new TimeMillisecond();
+    case Type.TimeSecond:
+      return new TimeSecond();
+    case Type.TimestampNanosecond:
+      return sanitizeTypedTimestamp(typeLike, TimestampNanosecond);
+    case Type.TimestampMicrosecond:
+      return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond);
+    case Type.TimestampMillisecond:
+      return sanitizeTypedTimestamp(typeLike, TimestampMillisecond);
+    case Type.TimestampSecond:
+      return sanitizeTypedTimestamp(typeLike, TimestampSecond);
+    case Type.DenseUnion:
+      return sanitizeTypedUnion(typeLike, DenseUnion);
+    case Type.SparseUnion:
+      return sanitizeTypedUnion(typeLike, SparseUnion);
+    case Type.IntervalDayTime:
+      return new IntervalDayTime();
+    case Type.IntervalYearMonth:
+      return new IntervalYearMonth();
+    case Type.DurationNanosecond:
+      return new DurationNanosecond();
+    case Type.DurationMicrosecond:
+      return new DurationMicrosecond();
+    case Type.DurationMillisecond:
+      return new DurationMillisecond();
+    case Type.DurationSecond:
+      return new DurationSecond();
+  }
+}
+
+function sanitizeField(fieldLike: unknown): Field {
+  if (fieldLike instanceof Field) {
+    return fieldLike;
+  }
+  if (typeof fieldLike !== "object" || fieldLike === null) {
+    throw Error("Expected a Field but object was null/undefined");
+  }
+  if (
+    !("type" in fieldLike) ||
+    !("name" in fieldLike) ||
+    !("nullable" in fieldLike)
+  ) {
+    throw Error(
+      "The field passed in is missing a `type`/`name`/`nullable` property"
+    );
+  }
+  const type = sanitizeType(fieldLike.type);
+  const name = fieldLike.name;
+  if (!(typeof name === "string")) {
+    throw Error("The field passed in had a non-string `name` property");
+  }
+  const nullable = fieldLike.nullable;
+  if (!(typeof nullable === "boolean")) {
+    throw Error("The field passed in had a non-boolean `nullable` property");
+  }
+  let metadata;
+  if ("metadata" in fieldLike) {
+    metadata = sanitizeMetadata(fieldLike.metadata);
+  }
+  return new Field(name, type, nullable, metadata);
+}
+
+/**
+ * Convert something schemaLike into a Schema instance
+ *
+ * This method is often needed even when the caller is using a Schema
+ * instance because they might be using a different instance of apache-arrow
+ * than lancedb is using.
+ */
+export function sanitizeSchema(schemaLike: unknown): Schema {
+  if (schemaLike instanceof Schema) {
+    return schemaLike;
+  }
+  if (typeof schemaLike !== "object" || schemaLike === null) {
+    throw Error("Expected a Schema but object was null/undefined");
+  }
+  if (!("fields" in schemaLike)) {
+    throw Error(
+      "The schema passed in does not appear to be a schema (no 'fields' property)"
+    );
+  }
+  let metadata;
+  if ("metadata" in schemaLike) {
+    metadata = sanitizeMetadata(schemaLike.metadata);
+  }
+  if (!Array.isArray(schemaLike.fields)) {
+    throw Error(
+      "The schema passed in had a 'fields' property but it was not an array"
+    );
+  }
+  const sanitizedFields = schemaLike.fields.map((field) =>
+    sanitizeField(field)
+  );
+  return new Schema(sanitizedFields, metadata);
+}
--- a/node/src/test/arrow.test.ts
+++ b/node/src/test/arrow.test.ts
@@ -0,0 +1,360 @@
+// Copyright 2024 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { describe } from 'mocha'
+import { assert, expect, use as chaiUse } from 'chai'
+import * as chaiAsPromised from 'chai-as-promised'
+
+import { convertToTable, fromTableToBuffer, makeArrowTable, makeEmptyTable } from '../arrow'
+import {
+  Field,
+  FixedSizeList,
+  Float16,
+  Float32,
+  Int32,
+  tableFromIPC,
+  Schema,
+  Float64,
+  type Table,
+  Binary,
+  Bool,
+  Utf8,
+  Struct,
+  List,
+  DataType,
+  Dictionary,
+  Int64,
+  MetadataVersion
+} from 'apache-arrow'
+import {
+  Dictionary as OldDictionary,
+  Field as OldField,
+  FixedSizeList as OldFixedSizeList,
+  Float32 as OldFloat32,
+  Int32 as OldInt32,
+  Struct as OldStruct,
+  Schema as OldSchema,
+  TimestampNanosecond as OldTimestampNanosecond,
+  Utf8 as OldUtf8
+} from 'apache-arrow-old'
+import { type EmbeddingFunction } from '../embedding/embedding_function'
+
+chaiUse(chaiAsPromised)
+
+function sampleRecords (): Array<Record<string, any>> {
+  return [
+    {
+      binary: Buffer.alloc(5),
+      boolean: false,
+      number: 7,
+      string: 'hello',
+      struct: { x: 0, y: 0 },
+      list: ['anime', 'action', 'comedy']
+    }
+  ]
+}
+
+// Helper method to verify various ways to create a table
+async function checkTableCreation (tableCreationMethod: (records: any, recordsReversed: any, schema: Schema) => Promise<Table>): Promise<void> {
+  const records = sampleRecords()
+  const recordsReversed = [{
+    list: ['anime', 'action', 'comedy'],
+    struct: { x: 0, y: 0 },
+    string: 'hello',
+    number: 7,
+    boolean: false,
+    binary: Buffer.alloc(5)
+  }]
+  const schema = new Schema([
+    new Field('binary', new Binary(), false),
+    new Field('boolean', new Bool(), false),
+    new Field('number', new Float64(), false),
+    new Field('string', new Utf8(), false),
+    new Field('struct', new Struct([
+      new Field('x', new Float64(), false),
+      new Field('y', new Float64(), false)
+    ])),
+    new Field('list', new List(new Field('item', new Utf8(), false)), false)
+  ])
+
+  const table = await tableCreationMethod(records, recordsReversed, schema)
+  schema.fields.forEach((field, idx) => {
+    const actualField = table.schema.fields[idx]
+    assert.isFalse(actualField.nullable)
+    assert.equal(table.getChild(field.name)?.type.toString(), field.type.toString())
+    assert.equal(table.getChildAt(idx)?.type.toString(), field.type.toString())
+  })
+}
+
+describe('The function makeArrowTable', function () {
+  it('will use data types from a provided schema instead of inference', async function () {
+    const schema = new Schema([
+      new Field('a', new Int32()),
+      new Field('b', new Float32()),
+      new Field('c', new FixedSizeList(3, new Field('item', new Float16()))),
+      new Field('d', new Int64())
+    ])
+    const table = makeArrowTable(
+      [
+        { a: 1, b: 2, c: [1, 2, 3], d: 9 },
+        { a: 4, b: 5, c: [4, 5, 6], d: 10 },
+        { a: 7, b: 8, c: [7, 8, 9], d: null }
+      ],
+      { schema }
+    )
+
+    const buf = await fromTableToBuffer(table)
+    assert.isAbove(buf.byteLength, 0)
+
+    const actual = tableFromIPC(buf)
+    assert.equal(actual.numRows, 3)
+    const actualSchema = actual.schema
+    assert.deepEqual(actualSchema, schema)
+  })
+
+  it('will assume the column `vector` is FixedSizeList<Float32> by default', async function () {
+    const schema = new Schema([
+      new Field('a', new Float64()),
+      new Field('b', new Float64()),
+      new Field(
+        'vector',
+        new FixedSizeList(3, new Field('item', new Float32(), true))
+      )
+    ])
+    const table = makeArrowTable([
+      { a: 1, b: 2, vector: [1, 2, 3] },
+      { a: 4, b: 5, vector: [4, 5, 6] },
+      { a: 7, b: 8, vector: [7, 8, 9] }
+    ])
+
+    const buf = await fromTableToBuffer(table)
+    assert.isAbove(buf.byteLength, 0)
+
+    const actual = tableFromIPC(buf)
+    assert.equal(actual.numRows, 3)
+    const actualSchema = actual.schema
+    assert.deepEqual(actualSchema, schema)
+  })
+
+  it('can support multiple vector columns', async function () {
+    const schema = new Schema([
+      new Field('a', new Float64()),
+      new Field('b', new Float64()),
+      new Field('vec1', new FixedSizeList(3, new Field('item', new Float16(), true))),
+      new Field('vec2', new FixedSizeList(3, new Field('item', new Float16(), true)))
+    ])
+    const table = makeArrowTable(
+      [
+        { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
+        { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
+        { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
+      ],
+      {
+        vectorColumns: {
+          vec1: { type: new Float16() },
+          vec2: { type: new Float16() }
+        }
+      }
+    )
+
+    const buf = await fromTableToBuffer(table)
+    assert.isAbove(buf.byteLength, 0)
+
+    const actual = tableFromIPC(buf)
+    assert.equal(actual.numRows, 3)
+    const actualSchema = actual.schema
+    assert.deepEqual(actualSchema, schema)
+  })
+
+  it('will allow different vector column types', async function () {
+    const table = makeArrowTable(
+      [
+        { fp16: [1], fp32: [1], fp64: [1] }
+      ],
+      {
+        vectorColumns: {
+          fp16: { type: new Float16() },
+          fp32: { type: new Float32() },
+          fp64: { type: new Float64() }
+        }
+      }
+    )
+
+    assert.equal(table.getChild('fp16')?.type.children[0].type.toString(), new Float16().toString())
+    assert.equal(table.getChild('fp32')?.type.children[0].type.toString(), new Float32().toString())
+    assert.equal(table.getChild('fp64')?.type.children[0].type.toString(), new Float64().toString())
+  })
+
+  it('will use dictionary encoded strings if asked', async function () {
+    const table = makeArrowTable([{ str: 'hello' }])
+    assert.isTrue(DataType.isUtf8(table.getChild('str')?.type))
+
+    const tableWithDict = makeArrowTable([{ str: 'hello' }], { dictionaryEncodeStrings: true })
+    assert.isTrue(DataType.isDictionary(tableWithDict.getChild('str')?.type))
+
+    const schema = new Schema([
+      new Field('str', new Dictionary(new Utf8(), new Int32()))
+    ])
+
+    const tableWithDict2 = makeArrowTable([{ str: 'hello' }], { schema })
+    assert.isTrue(DataType.isDictionary(tableWithDict2.getChild('str')?.type))
+  })
+
+  it('will infer data types correctly', async function () {
+    await checkTableCreation(async (records) => makeArrowTable(records))
+  })
+
+  it('will allow a schema to be provided', async function () {
+    await checkTableCreation(async (records, _, schema) => makeArrowTable(records, { schema }))
+  })
+
+  it('will use the field order of any provided schema', async function () {
+    await checkTableCreation(async (_, recordsReversed, schema) => makeArrowTable(recordsReversed, { schema }))
+  })
+
+  it('will make an empty table', async function () {
+    await checkTableCreation(async (_, __, schema) => makeArrowTable([], { schema }))
+  })
+})
+
+class DummyEmbedding implements EmbeddingFunction<string> {
+  public readonly sourceColumn = 'string'
+  public readonly embeddingDimension = 2
+  public readonly embeddingDataType = new Float16()
+
+  async embed (data: string[]): Promise<number[][]> {
+    return data.map(
+      () => [0.0, 0.0]
+    )
+  }
+}
+
+class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
+  public readonly sourceColumn = 'string'
+
+  async embed (data: string[]): Promise<number[][]> {
+    return data.map(
+      () => [0.0, 0.0]
+    )
+  }
+}
+
+describe('convertToTable', function () {
+  it('will infer data types correctly', async function () {
+    await checkTableCreation(async (records) => await convertToTable(records))
+  })
+
+  it('will allow a schema to be provided', async function () {
+    await checkTableCreation(async (records, _, schema) => await convertToTable(records, undefined, { schema }))
+  })
+
+  it('will use the field order of any provided schema', async function () {
+    await checkTableCreation(async (_, recordsReversed, schema) => await convertToTable(recordsReversed, undefined, { schema }))
+  })
+
+  it('will make an empty table', async function () {
+    await checkTableCreation(async (_, __, schema) => await convertToTable([], undefined, { schema }))
+  })
+
+  it('will apply embeddings', async function () {
+    const records = sampleRecords()
+    const table = await convertToTable(records, new DummyEmbedding())
+    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
+    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
+  })
+
+  it('will fail if missing the embedding source column', async function () {
+    return await expect(convertToTable([{ id: 1 }], new DummyEmbedding())).to.be.rejectedWith("'string' was not present")
+  })
+
+  it('use embeddingDimension if embedding missing from table', async function () {
+    const schema = new Schema([
+      new Field('string', new Utf8(), false)
+    ])
+    // Simulate getting an empty Arrow table (minus embedding) from some other source
+    // In other words, we aren't starting with records
+    const table = makeEmptyTable(schema)
+
+    // If the embedding specifies the dimension we are fine
+    await fromTableToBuffer(table, new DummyEmbedding())
+
+    // We can also supply a schema and should be ok
+    const schemaWithEmbedding = new Schema([
+      new Field('string', new Utf8(), false),
+      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
+    ])
+    await fromTableToBuffer(table, new DummyEmbeddingWithNoDimension(), schemaWithEmbedding)
+
+    // Otherwise we will get an error
+    return await expect(fromTableToBuffer(table, new DummyEmbeddingWithNoDimension())).to.be.rejectedWith('does not specify `embeddingDimension`')
+  })
+
+  it('will apply embeddings to an empty table', async function () {
+    const schema = new Schema([
+      new Field('string', new Utf8(), false),
+      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
+    ])
+    const table = await convertToTable([], new DummyEmbedding(), { schema })
+    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
+    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
+  })
+
+  it('will complain if embeddings present but schema missing embedding column', async function () {
+    const schema = new Schema([
+      new Field('string', new Utf8(), false)
+    ])
+    return await expect(convertToTable([], new DummyEmbedding(), { schema })).to.be.rejectedWith('column vector was missing')
+  })
+
+  it('will provide a nice error if run twice', async function () {
+    const records = sampleRecords()
+    const table = await convertToTable(records, new DummyEmbedding())
+    // fromTableToBuffer will try and apply the embeddings again
+    return await expect(fromTableToBuffer(table, new DummyEmbedding())).to.be.rejectedWith('already existed')
+  })
+})
+
+describe('makeEmptyTable', function () {
+  it('will make an empty table', async function () {
+    await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema))
+  })
+})
+
+describe('when using two versions of arrow', function () {
+  it('can still import data', async function() {
+    const schema = new OldSchema([
+      new OldField('id', new OldInt32()),
+      new OldField('vector', new OldFixedSizeList(1024, new OldField("item", new OldFloat32(), true))),
+      new OldField('struct', new OldStruct([
+        new OldField('nested', new OldDictionary(new OldUtf8(), new OldInt32(), 1, true)),
+        new OldField('ts_with_tz', new OldTimestampNanosecond("some_tz")),
+        new OldField('ts_no_tz', new OldTimestampNanosecond(null))
+      ]))
+    ]) as any
+    // We use arrow version 13 to emulate a "foreign arrow" and this version doesn't have metadataVersion
+    // In theory, this wouldn't matter.  We don't rely on that property.  However, it causes deepEqual to
+    // fail so we patch it back in
+    schema.metadataVersion = MetadataVersion.V5
+    const table = makeArrowTable(
+      [],
+      { schema }
+    )
+
+    const buf = await fromTableToBuffer(table)
+    assert.isAbove(buf.byteLength, 0)
+    const actual = tableFromIPC(buf)
+    const actualSchema = actual.schema
+    assert.deepEqual(actualSchema, schema)
+  })
+})
--- a/node/src/test/embedding/openai.ts
+++ b/node/src/test/embedding/openai.ts
@@ -0,0 +1,55 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { describe } from 'mocha'
+import { assert } from 'chai'
+
+import { OpenAIEmbeddingFunction } from '../../embedding/openai'
+import { isEmbeddingFunction } from '../../embedding/embedding_function'
+
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+const OpenAIApi = require('openai')
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+const { stub } = require('sinon')
+
+describe('OpenAPIEmbeddings', function () {
+  const stubValue = {
+    data: [
+      {
+        embedding: Array(1536).fill(1.0)
+      },
+      {
+        embedding: Array(1536).fill(2.0)
+      }
+    ]
+  }
+
+  describe('#embed', function () {
+    it('should create vector embeddings', async function () {
+      const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
+      const f = new OpenAIEmbeddingFunction('text', 'sk-key')
+      const vectors = await f.embed(['abc', 'def'])
+      assert.isTrue(openAIStub.calledOnce)
+      assert.equal(vectors.length, 2)
+      assert.deepEqual(vectors[0], stubValue.data[0].embedding)
+      assert.deepEqual(vectors[1], stubValue.data[1].embedding)
+    })
+  })
+
+  describe('isEmbeddingFunction', function () {
+    it('should match the isEmbeddingFunction guard', function () {
+      assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
+    })
+  })
+})
--- a/node/src/test/io.ts
+++ b/node/src/test/io.ts
@@ -0,0 +1,76 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// IO tests
+
+import { describe } from 'mocha'
+import { assert } from 'chai'
+
+import * as lancedb from '../index'
+import { type ConnectionOptions } from '../index'
+
+describe('LanceDB S3 client', function () {
+  if (process.env.TEST_S3_BASE_URL != null) {
+    const baseUri = process.env.TEST_S3_BASE_URL
+    it('should have a valid url', async function () {
+      const opts = { uri: `${baseUri}/valid_url` }
+      const table = await createTestDB(opts, 2, 20)
+      const con = await lancedb.connect(opts)
+      assert.equal(con.uri, opts.uri)
+
+      const results = await table.search([0.1, 0.3]).limit(5).execute()
+      assert.equal(results.length, 5)
+    }).timeout(10_000)
+  } else {
+    describe.skip('Skip S3 test', function () {})
+  }
+
+  if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) {
+    const baseUri = process.env.TEST_S3_BASE_URL
+    it('use custom credentials', async function () {
+      const opts: ConnectionOptions = {
+        uri: `${baseUri}/custom_credentials`,
+        awsCredentials: {
+          accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string,
+          secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string
+        }
+      }
+      const table = await createTestDB(opts, 2, 20)
+      console.log(table)
+      const con = await lancedb.connect(opts)
+      console.log(con)
+      assert.equal(con.uri, opts.uri)
+
+      const results = await table.search([0.1, 0.3]).limit(5).execute()
+      assert.equal(results.length, 5)
+    }).timeout(10_000)
+  } else {
+    describe.skip('Skip S3 test', function () {})
+  }
+})
+
+async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
+  const con = await lancedb.connect(opts)
+
+  const data = []
+  for (let i = 0; i < numRows; i++) {
+    const vector = []
+    for (let j = 0; j < numDimensions; j++) {
+      vector.push(i + (j * 0.1))
+    }
+    data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
+  }
+
+  return await con.createTable('vectors_2', data)
+}
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
--- a/node/src/test/util.ts
+++ b/node/src/test/util.ts
@@ -0,0 +1,45 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import { toSQL } from '../util'
+import * as chai from 'chai'
+
+const expect = chai.expect
+
+describe('toSQL', function () {
+  it('should turn string to SQL expression', function () {
+    expect(toSQL('foo')).to.equal("'foo'")
+  })
+
+  it('should turn number to SQL expression', function () {
+    expect(toSQL(123)).to.equal('123')
+  })
+
+  it('should turn boolean to SQL expression', function () {
+    expect(toSQL(true)).to.equal('TRUE')
+  })
+
+  it('should turn null to SQL expression', function () {
+    expect(toSQL(null)).to.equal('NULL')
+  })
+
+  it('should turn Date to SQL expression', function () {
+    const date = new Date('05 October 2011 14:48 UTC')
+    expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'")
+  })
+
+  it('should turn array to SQL expression', function () {
+    expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]")
+  })
+})
--- a/node/src/util.ts
+++ b/node/src/util.ts
@@ -0,0 +1,77 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+export type Literal = string | number | boolean | null | Date | Literal[]
+
+export function toSQL (value: Literal): string {
+  if (typeof value === 'string') {
+    return `'${value}'`
+  }
+
+  if (typeof value === 'number') {
+    return value.toString()
+  }
+
+  if (typeof value === 'boolean') {
+    return value ? 'TRUE' : 'FALSE'
+  }
+
+  if (value === null) {
+    return 'NULL'
+  }
+
+  if (value instanceof Date) {
+    return `'${value.toISOString()}'`
+  }
+
+  if (Array.isArray(value)) {
+    return `[${value.map(toSQL).join(', ')}]`
+  }
+
+  // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
+}
+
+export class TTLCache {
+  private readonly cache: Map<string, { value: any, expires: number }>
+
+  /**
+   * @param ttl Time to live in milliseconds
+   */
+  constructor (private readonly ttl: number) {
+    this.cache = new Map()
+  }
+
+  get (key: string): any | undefined {
+    const entry = this.cache.get(key)
+    if (entry === undefined) {
+      return undefined
+    }
+
+    if (entry.expires < Date.now()) {
+      this.cache.delete(key)
+      return undefined
+    }
+
+    return entry.value
+  }
+
+  set (key: string, value: any): void {
+    this.cache.set(key, { value, expires: Date.now() + this.ttl })
+  }
+
+  delete (key: string): void {
+    this.cache.delete(key)
+  }
+}
--- a/node/tsconfig.json
+++ b/node/tsconfig.json
@@ -0,0 +1,14 @@
+{
+  "include": [
+    "src/**/*.ts",
+    "src/*.ts"
+  ],
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "commonjs",
+    "declaration": true,
+    "outDir": "./dist",
+    "strict": true,
+    "sourceMap": true,
+  }
+}
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -1,13 +0,0 @@
-These are the typescript bindings of LanceDB.
-The core Rust library is in the `../rust/lancedb` directory, the rust binding
-code is in the `src/` directory and the typescript bindings are in
-the `lancedb/` directory.
-
-Whenever you change the Rust code, you will need to recompile: `npm run build`.
-
-Common commands:
-* Build: `npm run build`
-* Lint: `npm run lint`
-* Fix lints: `npm run lint-fix`
-* Test: `npm test`
-* Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.2"
+version = "0.20.0-beta.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,16 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-import {
-  Bool,
-  Field,
-  Int32,
-  List,
-  Schema,
-  Struct,
-  Uint8,
-  Utf8,
-} from "apache-arrow";
+import { Schema } from "apache-arrow";

 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
@@ -20,12 +11,10 @@ import * as arrow18 from "apache-arrow-18";
 import {
  convertToTable,
  fromBufferToRecordBatch,
-  fromDataToBuffer,
  fromRecordBatchToBuffer,
  fromTableToBuffer,
  makeArrowTable,
  makeEmptyTable,
-  tableFromIPC,
 } from "../lancedb/arrow";
 import {
  EmbeddingFunction,
@@ -264,98 +253,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actualSchema).toEqual(schema);
      });

-      it("will detect vector columns when name contains 'vector' or 'embedding'", async function () {
-        // Test various naming patterns that should be detected as vector columns
-        const floatVectorTable = makeArrowTable([
-          {
-            // Float vectors (use decimal values to ensure they're treated as floats)
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            user_vector: [1.1, 2.2],
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            text_embedding: [3.3, 4.4],
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            doc_embeddings: [5.5, 6.6],
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            my_vector_field: [7.7, 8.8],
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            embedding_model: [9.9, 10.1],
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            VECTOR_COL: [11.1, 12.2], // uppercase
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            Vector_Mixed: [13.3, 14.4], // mixed case
-          },
-        ]);
-
-        // Check that columns with 'vector' or 'embedding' in name are converted to FixedSizeList
-        const floatVectorColumns = [
-          "user_vector",
-          "text_embedding",
-          "doc_embeddings",
-          "my_vector_field",
-          "embedding_model",
-          "VECTOR_COL",
-          "Vector_Mixed",
-        ];
-
-        for (const columnName of floatVectorColumns) {
-          expect(
-            DataType.isFixedSizeList(
-              floatVectorTable.getChild(columnName)?.type,
-            ),
-          ).toBe(true);
-          // Check that float vectors use Float32 by default
-          expect(
-            floatVectorTable
-              .getChild(columnName)
-              ?.type.children[0].type.toString(),
-          ).toEqual(new Float32().toString());
-        }
-
-        // Test that regular integer arrays still get treated as float vectors
-        // (since JavaScript doesn't distinguish integers from floats at runtime)
-        const integerArrayTable = makeArrowTable([
-          {
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            vector_int: [1, 2], // Regular array with integers - should be Float32
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            embedding_int: [3, 4], // Regular array with integers - should be Float32
-          },
-        ]);
-
-        const integerArrayColumns = ["vector_int", "embedding_int"];
-
-        for (const columnName of integerArrayColumns) {
-          expect(
-            DataType.isFixedSizeList(
-              integerArrayTable.getChild(columnName)?.type,
-            ),
-          ).toBe(true);
-          // Regular integer arrays should use Float32 (avoiding false positives)
-          expect(
-            integerArrayTable
-              .getChild(columnName)
-              ?.type.children[0].type.toString(),
-          ).toEqual(new Float32().toString());
-        }
-
-        // Test normal list should NOT be converted to FixedSizeList
-        const normalListTable = makeArrowTable([
-          {
-            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
-            normal_list: [15.5, 16.6], // should NOT be detected as vector
-          },
-        ]);
-
-        expect(
-          DataType.isFixedSizeList(
-            normalListTable.getChild("normal_list")?.type,
-          ),
-        ).toBe(false);
-        expect(
-          DataType.isList(normalListTable.getChild("normal_list")?.type),
-        ).toBe(true);
-      });
-
      it("will allow different vector column types", async function () {
        const table = makeArrowTable([{ fp16: [1], fp32: [1], fp64: [1] }], {
          vectorColumns: {
@@ -478,221 +375,8 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(table2.schema).toEqual(schema);
      });

-      it("will handle missing columns in schema alignment when using embeddings", async function () {
-        const schema = new Schema(
-          [
-            new Field("domain", new Utf8(), true),
-            new Field("name", new Utf8(), true),
-            new Field("description", new Utf8(), true),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          { domain: "google.com", name: "Google" },
-          { domain: "facebook.com", name: "Facebook" },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(3);
-        expect(table.numRows).toBe(2);
-
-        const descriptionColumn = table.getChild("description");
-        expect(descriptionColumn).toBeDefined();
-        expect(descriptionColumn?.nullCount).toBe(2);
-        expect(descriptionColumn?.toArray()).toEqual([null, null]);
-
-        expect(table.getChild("domain")?.toArray()).toEqual([
-          "google.com",
-          "facebook.com",
-        ]);
-        expect(table.getChild("name")?.toArray()).toEqual([
-          "Google",
-          "Facebook",
-        ]);
-      });
-
-      it("will handle completely missing nested struct columns", async function () {
-        const schema = new Schema(
-          [
-            new Field("id", new Utf8(), true),
-            new Field("name", new Utf8(), true),
-            new Field(
-              "metadata",
-              new Struct([
-                new Field("version", new Int32(), true),
-                new Field("author", new Utf8(), true),
-                new Field(
-                  "tags",
-                  new List(new Field("item", new Utf8(), true)),
-                  true,
-                ),
-              ]),
-              true,
-            ),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          { id: "doc1", name: "Document 1" },
-          { id: "doc2", name: "Document 2" },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(3);
-        expect(table.numRows).toBe(2);
-
-        const buf = await fromTableToBuffer(table);
-        const retrievedTable = tableFromIPC(buf);
-
-        const rows = [];
-        for (let i = 0; i < retrievedTable.numRows; i++) {
-          rows.push(retrievedTable.get(i));
-        }
-
-        expect(rows[0].metadata.version).toBe(null);
-        expect(rows[0].metadata.author).toBe(null);
-        expect(rows[0].metadata.tags).toBe(null);
-        expect(rows[0].id).toBe("doc1");
-        expect(rows[0].name).toBe("Document 1");
-      });
-
-      it("will handle partially missing nested struct fields", async function () {
-        const schema = new Schema(
-          [
-            new Field("id", new Utf8(), true),
-            new Field(
-              "metadata",
-              new Struct([
-                new Field("version", new Int32(), true),
-                new Field("author", new Utf8(), true),
-                new Field("created_at", new Utf8(), true),
-              ]),
-              true,
-            ),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          { id: "doc1", metadata: { version: 1, author: "Alice" } },
-          { id: "doc2", metadata: { version: 2 } },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(2);
-        expect(table.numRows).toBe(2);
-
-        const metadataColumn = table.getChild("metadata");
-        expect(metadataColumn).toBeDefined();
-        expect(metadataColumn?.type.toString()).toBe(
-          "Struct<{version:Int32, author:Utf8, created_at:Utf8}>",
-        );
-      });
-
-      it("will handle multiple levels of nested structures", async function () {
-        const schema = new Schema(
-          [
-            new Field("id", new Utf8(), true),
-            new Field(
-              "config",
-              new Struct([
-                new Field("database", new Utf8(), true),
-                new Field(
-                  "connection",
-                  new Struct([
-                    new Field("host", new Utf8(), true),
-                    new Field("port", new Int32(), true),
-                    new Field(
-                      "ssl",
-                      new Struct([
-                        new Field("enabled", new Bool(), true),
-                        new Field("cert_path", new Utf8(), true),
-                      ]),
-                      true,
-                    ),
-                  ]),
-                  true,
-                ),
-              ]),
-              true,
-            ),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          {
-            id: "config1",
-            config: {
-              database: "postgres",
-              connection: { host: "localhost" },
-            },
-          },
-          {
-            id: "config2",
-            config: { database: "mysql" },
-          },
-          {
-            id: "config3",
-          },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(2);
-        expect(table.numRows).toBe(3);
-
-        const configColumn = table.getChild("config");
-        expect(configColumn).toBeDefined();
-        expect(configColumn?.type.toString()).toBe(
-          "Struct<{database:Utf8, connection:Struct<{host:Utf8, port:Int32, ssl:Struct<{enabled:Bool, cert_path:Utf8}>}>}>",
-        );
-      });
-
-      it("will handle missing columns in Arrow table input when using embeddings", async function () {
-        const incompleteTable = makeArrowTable([
-          { domain: "google.com", name: "Google" },
-          { domain: "facebook.com", name: "Facebook" },
-        ]);
-
-        const schema = new Schema(
-          [
-            new Field("domain", new Utf8(), true),
-            new Field("name", new Utf8(), true),
-            new Field("description", new Utf8(), true),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const buf = await fromDataToBuffer(incompleteTable, undefined, schema);
-
-        expect(buf.byteLength).toBeGreaterThan(0);
-
-        const retrievedTable = tableFromIPC(buf);
-        expect(retrievedTable.numCols).toBe(3);
-        expect(retrievedTable.numRows).toBe(2);
-
-        const descriptionColumn = retrievedTable.getChild("description");
-        expect(descriptionColumn).toBeDefined();
-        expect(descriptionColumn?.nullCount).toBe(2);
-        expect(descriptionColumn?.toArray()).toEqual([null, null]);
-
-        expect(retrievedTable.getChild("domain")?.toArray()).toEqual([
-          "google.com",
-          "facebook.com",
-        ]);
-        expect(retrievedTable.getChild("name")?.toArray()).toEqual([
-          "Google",
-          "Facebook",
-        ]);
-      });
-
      it("should correctly retain values in nested struct fields", async function () {
+        // Define test data with nested struct
        const testData = [
          {
            id: "doc1",
@@ -716,8 +400,10 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          },
        ];

+        // Create Arrow table from the data
        const table = makeArrowTable(testData);

+        // Verify schema has the nested struct fields
        const metadataField = table.schema.fields.find(
          (f) => f.name === "metadata",
        );
@@ -731,17 +417,23 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          "text",
        ]);

+        // Convert to buffer and back (simulating storage and retrieval)
        const buf = await fromTableToBuffer(table);
        const retrievedTable = tableFromIPC(buf);

+        // Verify the retrieved table has the same structure
        const rows = [];
        for (let i = 0; i < retrievedTable.numRows; i++) {
          rows.push(retrievedTable.get(i));
        }

+        // Check values in the first row
        const firstRow = rows[0];
        expect(firstRow.id).toBe("doc1");
        expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]);
+
+        // Verify metadata values are preserved (this is where the bug is)
+        expect(firstRow.metadata).toBeDefined();
        expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts");
        expect(firstRow.metadata.startLine).toBe(10);
        expect(firstRow.metadata.endLine).toBe(20);
@@ -900,14 +592,14 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        ).rejects.toThrow("column vector was missing");
      });

-      it("will skip embedding application if already applied", async function () {
+      it("will provide a nice error if run twice", async function () {
        const records = sampleRecords();
        const table = await convertToTable(records, dummyEmbeddingConfig);

        // fromTableToBuffer will try and apply the embeddings again
-        // but should skip since the column already has non-null values
-        const result = await fromTableToBuffer(table, dummyEmbeddingConfig);
-        expect(result.byteLength).toBeGreaterThan(0);
+        await expect(
+          fromTableToBuffer(table, dummyEmbeddingConfig),
+        ).rejects.toThrow("already existed");
      });
    });

--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -42,28 +42,6 @@ describe("remote connection", () => {
    });
  });

-  it("should accept overall timeout configuration", async () => {
-    await connect("db://test", {
-      apiKey: "fake",
-      clientConfig: {
-        timeoutConfig: { timeout: 30 },
-      },
-    });
-
-    // Test with all timeout parameters
-    await connect("db://test", {
-      apiKey: "fake",
-      clientConfig: {
-        timeoutConfig: {
-          timeout: 60,
-          connectTimeout: 10,
-          readTimeout: 20,
-          poolIdleTimeout: 300,
-        },
-      },
-    });
-  });
-
  it("should pass down apiKey and userAgent", async () => {
    await withMockDatabase(
      (req, res) => {
--- a/nodejs/test/session.test.ts
+++ b/nodejs/test/session.test.ts
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import * as tmp from "tmp";
-import { Session, connect } from "../lancedb";
-
-describe("Session", () => {
-  let tmpDir: tmp.DirResult;
-  beforeEach(() => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-  });
-  afterEach(() => tmpDir.removeCallback());
-
-  it("should configure cache sizes and work with database operations", async () => {
-    // Create session with small cache limits for testing
-    const indexCacheSize = BigInt(1024 * 1024); // 1MB
-    const metadataCacheSize = BigInt(512 * 1024); // 512KB
-
-    const session = new Session(indexCacheSize, metadataCacheSize);
-
-    // Record initial cache state
-    const initialCacheSize = session.sizeBytes();
-    const initialCacheItems = session.approxNumItems();
-
-    // Test session works with database connection
-    const db = await connect({ uri: tmpDir.name, session: session });
-
-    // Create and use a table to exercise the session
-    const data = Array.from({ length: 100 }, (_, i) => ({
-      id: i,
-      text: `item ${i}`,
-    }));
-    const table = await db.createTable("test", data);
-    const results = await table.query().limit(5).toArray();
-
-    expect(results).toHaveLength(5);
-
-    // Verify cache usage increased after operations
-    const finalCacheSize = session.sizeBytes();
-    const finalCacheItems = session.approxNumItems();
-
-    expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
-    expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
-    expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
-  });
-});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -33,12 +33,7 @@ import {
  register,
 } from "../lancedb/embedding";
 import { Index } from "../lancedb/indices";
-import {
-  BooleanQuery,
-  Occur,
-  Operator,
-  instanceOfFullTextQuery,
-} from "../lancedb/query";
+import { instanceOfFullTextQuery } from "../lancedb/query";
 import exp = require("constants");

 describe.each([arrow15, arrow16, arrow17, arrow18])(
@@ -368,9 +363,9 @@ describe("merge insert", () => {
      { a: 4, b: "z" },
    ];

-    const result = (await table.toArrow()).toArray().sort((a, b) => a.a - b.a);
-
-    expect(result.map((row) => ({ ...row }))).toEqual(expected);
+    expect(
+      JSON.parse(JSON.stringify((await table.toArrow()).toArray())),
+    ).toEqual(expected);
  });
  test("conditional update", async () => {
    const newData = [
@@ -559,32 +554,6 @@ describe("When creating an index", () => {
    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
    expect(rst.numRows).toBe(1);

-    // test nprobes
-    rst = await tbl.query().nearestTo(queryVec).limit(2).nprobes(50).toArrow();
-    expect(rst.numRows).toBe(2);
-    rst = await tbl
-      .query()
-      .nearestTo(queryVec)
-      .limit(2)
-      .minimumNprobes(15)
-      .toArrow();
-    expect(rst.numRows).toBe(2);
-    rst = await tbl
-      .query()
-      .nearestTo(queryVec)
-      .limit(2)
-      .minimumNprobes(10)
-      .maximumNprobes(20)
-      .toArrow();
-    expect(rst.numRows).toBe(2);
-
-    expect(() => tbl.query().nearestTo(queryVec).minimumNprobes(0)).toThrow(
-      "Invalid input, minimum_nprobes must be greater than 0",
-    );
-    expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
-      "Invalid input, maximum_nprobes must be greater than or equal to minimum_nprobes",
-    );
-
    await tbl.dropIndex("vec_idx");
    const indices2 = await tbl.listIndices();
    expect(indices2.length).toBe(0);
@@ -1562,18 +1531,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(

      const results = await table.search("hello").toArray();
      expect(results[0].text).toBe(data[0].text);
-
-      const results2 = await table
-        .search(new MatchQuery("hello world", "text"))
-        .toArray();
-      expect(results2.length).toBe(2);
-
-      const results3 = await table
-        .search(
-          new MatchQuery("hello world", "text", { operator: Operator.And }),
-        )
-        .toArray();
-      expect(results3.length).toBe(1);
    });

    test("full text search without lowercase", async () => {
@@ -1650,114 +1607,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(resultSet.has("fob")).toBe(true);
      expect(resultSet.has("fo")).toBe(true);
      expect(resultSet.has("food")).toBe(true);
-
-      const prefixResults = await table
-        .search(
-          new MatchQuery("foo", "text", { fuzziness: 3, prefixLength: 3 }),
-        )
-        .toArray();
-      expect(prefixResults.length).toBe(2);
-      const resultSet2 = new Set(prefixResults.map((r) => r.text));
-      expect(resultSet2.has("foo")).toBe(true);
-      expect(resultSet2.has("food")).toBe(true);
-    });
-
-    test("full text search boolean query", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "The cat and dog are playing" },
-        { text: "The cat is sleeping" },
-        { text: "The dog is barking" },
-        { text: "The dog chases the cat" },
-      ];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts({ withPosition: false }),
-      });
-
-      const shouldResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Should, new MatchQuery("cat", "text")],
-            [Occur.Should, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(shouldResults.length).toBe(4);
-
-      const mustResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Must, new MatchQuery("cat", "text")],
-            [Occur.Must, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(mustResults.length).toBe(2);
-
-      const mustNotResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Must, new MatchQuery("cat", "text")],
-            [Occur.MustNot, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(mustNotResults.length).toBe(1);
-    });
-
-    test("full text search ngram", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "hello world", vector: [0.1, 0.2, 0.3] },
-        { text: "lance database", vector: [0.4, 0.5, 0.6] },
-        { text: "lance is cool", vector: [0.7, 0.8, 0.9] },
-      ];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts({ baseTokenizer: "ngram" }),
-      });
-
-      const results = await table.search("lan").toArray();
-      expect(results.length).toBe(2);
-      const resultSet = new Set(results.map((r) => r.text));
-      expect(resultSet.has("lance database")).toBe(true);
-      expect(resultSet.has("lance is cool")).toBe(true);
-
-      const results2 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results2.length).toBe(2);
-      const resultSet2 = new Set(results2.map((r) => r.text));
-      expect(resultSet2.has("lance database")).toBe(true);
-      expect(resultSet2.has("lance is cool")).toBe(true);
-
-      // the default min_ngram_length is 3, so "la" should not match
-      const results3 = await table.search("la").toArray();
-      expect(results3.length).toBe(0);
-
-      // test setting min_ngram_length and prefix_only
-      await table.createIndex("text", {
-        config: Index.fts({
-          baseTokenizer: "ngram",
-          ngramMinLength: 2,
-          prefixOnly: true,
-        }),
-        replace: true,
-      });
-
-      const results4 = await table.search("lan").toArray();
-      expect(results4.length).toBe(2);
-      const resultSet4 = new Set(results4.map((r) => r.text));
-      expect(resultSet4.has("lance database")).toBe(true);
-      expect(resultSet4.has("lance is cool")).toBe(true);
-
-      const results5 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results5.length).toBe(0);
-
-      const results6 = await table.search("la").toArray();
-      expect(results6.length).toBe(2);
-      const resultSet6 = new Set(results6.map((r) => r.text));
-      expect(resultSet6.has("lance database")).toBe(true);
-      expect(resultSet6.has("lance is cool")).toBe(true);
    });

    test.each([
@@ -1863,43 +1712,4 @@ describe("column name options", () => {
    expect(results[0].query_index).toBe(0);
    expect(results[1].query_index).toBe(1);
  });
-
-  test("index and search multivectors", async () => {
-    const db = await connect(tmpDir.name);
-    const data = [];
-    // generate 512 random multivectors
-    for (let i = 0; i < 256; i++) {
-      data.push({
-        multivector: Array.from({ length: 10 }, () =>
-          Array(2).fill(Math.random()),
-        ),
-      });
-    }
-    const table = await db.createTable("multivectors", data, {
-      schema: new Schema([
-        new Field(
-          "multivector",
-          new List(
-            new Field(
-              "item",
-              new FixedSizeList(2, new Field("item", new Float32())),
-            ),
-          ),
-        ),
-      ]),
-    });
-
-    const results = await table.search(data[0].multivector).limit(10).toArray();
-    expect(results.length).toBe(10);
-
-    await table.createIndex("multivector", {
-      config: Index.ivfPq({ numPartitions: 2, distanceType: "cosine" }),
-    });
-
-    const results2 = await table
-      .search(data[0].multivector)
-      .limit(10)
-      .toArray();
-    expect(results2.length).toBe(10);
-  });
 });
--- a/nodejs/examples/package-lock.json
+++ b/nodejs/examples/package-lock.json
@@ -30,7 +30,7 @@
        "x64",
        "arm64"
      ],
-      "license": "Apache-2.0",
+      "license": "Apache 2.0",
      "os": [
        "darwin",
        "linux",
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -34,7 +34,6 @@ import {
  Struct,
  Timestamp,
  Type,
-  Uint8,
  Utf8,
  Vector,
  makeVector as arrowMakeVector,
@@ -52,15 +51,6 @@ import {
  sanitizeTable,
  sanitizeType,
 } from "./sanitize";
-
-/**
- * Check if a field name indicates a vector column.
- */
-function nameSuggestsVectorColumn(fieldName: string): boolean {
-  const nameLower = fieldName.toLowerCase();
-  return nameLower.includes("vector") || nameLower.includes("embedding");
-}
-
 export * from "apache-arrow";
 export type SchemaLike =
  | Schema
@@ -117,20 +107,6 @@ export type IntoVector =
  | number[]
  | Promise<Float32Array | Float64Array | number[]>;

-export type MultiVector = IntoVector[];
-
-export function isMultiVector(value: unknown): value is MultiVector {
-  return Array.isArray(value) && isIntoVector(value[0]);
-}
-
-export function isIntoVector(value: unknown): value is IntoVector {
-  return (
-    value instanceof Float32Array ||
-    value instanceof Float64Array ||
-    (Array.isArray(value) && !Array.isArray(value[0]))
-  );
-}
-
 export function isArrowTable(value: object): value is TableLike {
  if (value instanceof ArrowTable) return true;
  return "schema" in value && "batches" in value;
@@ -441,9 +417,7 @@ function inferSchema(
        } else {
          const inferredType = inferType(value, path, opts);
          if (inferredType === undefined) {
-            throw new Error(`Failed to infer data type for field ${path.join(
-              ".",
-            )} at row ${rowI}. \
+            throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
                             Consider providing an explicit schema.`);
          }
          pathTree.set(path, inferredType);
@@ -601,17 +575,10 @@ function inferType(
      return undefined;
    }
    // Try to automatically detect embedding columns.
-    if (nameSuggestsVectorColumn(path[path.length - 1])) {
-      // Check if value is a Uint8Array for integer vector type determination
-      if (value instanceof Uint8Array) {
-        // For integer vectors, we default to Uint8 (matching Python implementation)
-        const child = new Field("item", new Uint8(), true);
-        return new FixedSizeList(value.length, child);
-      } else {
-        // For float vectors, we default to Float32
-        const child = new Field("item", new Float32(), true);
-        return new FixedSizeList(value.length, child);
-      }
+    if (valueType instanceof Float && path[path.length - 1] === "vector") {
+      // We default to Float32 for vectors.
+      const child = new Field("item", new Float32(), true);
+      return new FixedSizeList(value.length, child);
    } else {
      const child = new Field("item", valueType, true);
      return new List(child);
@@ -832,17 +799,11 @@ async function applyEmbeddingsFromMetadata(
        `Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
      );
    }
-
-    // Check if destination column exists and handle accordingly
    if (columns[destColumn] !== undefined) {
-      const existingColumn = columns[destColumn];
-      // If the column exists but is all null, we can fill it with embeddings
-      if (existingColumn.nullCount !== existingColumn.length) {
-        // Column has non-null values, skip embedding application
-        continue;
-      }
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
+      );
    }
-
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
@@ -870,15 +831,6 @@ async function applyEmbeddingsFromMetadata(
    const vector = makeVector(vectors, destType);
    columns[destColumn] = vector;
  }
-
-  // Add any missing columns from the schema as null vectors
-  for (const field of schema.fields) {
-    if (!(field.name in columns)) {
-      const nullValues = new Array(table.numRows).fill(null);
-      columns[field.name] = makeVector(nullValues, field.type);
-    }
-  }
-
  const newTable = new ArrowTable(columns);
  return alignTable(newTable, schema);
 }
@@ -951,23 +903,11 @@ async function applyEmbeddings<T>(
      );
    }
  } else {
-    // Check if destination column exists and handle accordingly
    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      const existingColumn = newColumns[destColumn];
-      // If the column exists but is all null, we can fill it with embeddings
-      if (existingColumn.nullCount !== existingColumn.length) {
-        // Column has non-null values, skip embedding application and return table as-is
-        let newTable = new ArrowTable(newColumns);
-        if (schema != null) {
-          newTable = alignTable(newTable, schema as Schema);
-        }
-        return new ArrowTable(
-          new Schema(newTable.schema.fields, schemaMetadata),
-          newTable.batches,
-        );
-      }
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
+      );
    }
-
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
@@ -1027,21 +967,7 @@ export async function convertToTable(
  embeddings?: EmbeddingFunctionConfig,
  makeTableOptions?: Partial<MakeArrowTableOptions>,
 ): Promise<ArrowTable> {
-  let processedData = data;
-
-  // If we have a schema with embedding metadata, we need to preprocess the data
-  // to ensure all nested fields are present
-  if (
-    makeTableOptions?.schema &&
-    makeTableOptions.schema.metadata?.has("embedding_functions")
-  ) {
-    processedData = ensureNestedFieldsExist(
-      data,
-      makeTableOptions.schema as Schema,
-    );
-  }
-
-  const table = makeArrowTable(processedData, makeTableOptions);
+  const table = makeArrowTable(data, makeTableOptions);
  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
 }

@@ -1134,16 +1060,7 @@ export async function fromDataToBuffer(
    schema = sanitizeSchema(schema);
  }
  if (isArrowTable(data)) {
-    const table = sanitizeTable(data);
-    // If we have a schema with embedding functions, we need to ensure all columns exist
-    // before applying embeddings, since applyEmbeddingsFromMetadata expects all columns
-    // to be present in the table
-    if (schema && schema.metadata?.has("embedding_functions")) {
-      const alignedTable = alignTableToSchema(table, schema);
-      return fromTableToBuffer(alignedTable, embeddings, schema);
-    } else {
-      return fromTableToBuffer(table, embeddings, schema);
-    }
+    return fromTableToBuffer(sanitizeTable(data), embeddings, schema);
  } else {
    const table = await convertToTable(data, embeddings, { schema });
    return fromTableToBuffer(table);
@@ -1212,7 +1129,7 @@ function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
    type: new Struct(schema.fields),
    length: batch.numRows,
    nullCount: batch.nullCount,
-    children: alignedChildren as unknown as ArrowData<DataType>[],
+    children: alignedChildren,
  });
  return new RecordBatch(schema, newData);
 }
@@ -1284,79 +1201,6 @@ function validateSchemaEmbeddings(
  return new Schema(fields, schema.metadata);
 }

-/**
- * Ensures that all nested fields defined in the schema exist in the data,
- * filling missing fields with null values.
- */
-export function ensureNestedFieldsExist(
-  data: Array<Record<string, unknown>>,
-  schema: Schema,
-): Array<Record<string, unknown>> {
-  return data.map((row) => {
-    const completeRow: Record<string, unknown> = {};
-
-    for (const field of schema.fields) {
-      if (field.name in row) {
-        if (
-          field.type.constructor.name === "Struct" &&
-          row[field.name] !== null &&
-          row[field.name] !== undefined
-        ) {
-          // Handle nested struct
-          const nestedValue = row[field.name] as Record<string, unknown>;
-          completeRow[field.name] = ensureStructFieldsExist(
-            nestedValue,
-            field.type,
-          );
-        } else {
-          // Non-struct field or null struct value
-          completeRow[field.name] = row[field.name];
-        }
-      } else {
-        // Field is missing from the data - set to null
-        completeRow[field.name] = null;
-      }
-    }
-
-    return completeRow;
-  });
-}
-
-/**
- * Recursively ensures that all fields in a struct type exist in the data,
- * filling missing fields with null values.
- */
-function ensureStructFieldsExist(
-  data: Record<string, unknown>,
-  structType: Struct,
-): Record<string, unknown> {
-  const completeStruct: Record<string, unknown> = {};
-
-  for (const childField of structType.children) {
-    if (childField.name in data) {
-      if (
-        childField.type.constructor.name === "Struct" &&
-        data[childField.name] !== null &&
-        data[childField.name] !== undefined
-      ) {
-        // Recursively handle nested struct
-        completeStruct[childField.name] = ensureStructFieldsExist(
-          data[childField.name] as Record<string, unknown>,
-          childField.type,
-        );
-      } else {
-        // Non-struct field or null struct value
-        completeStruct[childField.name] = data[childField.name];
-      }
-    } else {
-      // Field is missing - set to null
-      completeStruct[childField.name] = null;
-    }
-  }
-
-  return completeStruct;
-}
-
 interface JsonDataType {
  type: string;
  fields?: JsonField[];
@@ -1490,64 +1334,3 @@ function fieldToJson(field: Field): JsonField {
    metadata: field.metadata,
  };
 }
-
-function alignTableToSchema(
-  table: ArrowTable,
-  targetSchema: Schema,
-): ArrowTable {
-  const existingColumns = new Map<string, Vector>();
-
-  // Map existing columns
-  for (const field of table.schema.fields) {
-    existingColumns.set(field.name, table.getChild(field.name)!);
-  }
-
-  // Create vectors for all fields in target schema
-  const alignedColumns: Record<string, Vector> = {};
-
-  for (const field of targetSchema.fields) {
-    if (existingColumns.has(field.name)) {
-      // Column exists, use it
-      alignedColumns[field.name] = existingColumns.get(field.name)!;
-    } else {
-      // Column missing, create null vector
-      alignedColumns[field.name] = createNullVector(field, table.numRows);
-    }
-  }
-
-  // Create new table with aligned schema and columns
-  return new ArrowTable(targetSchema, alignedColumns);
-}
-
-function createNullVector(field: Field, numRows: number): Vector {
-  if (field.type.constructor.name === "Struct") {
-    // For struct types, create a struct with null fields
-    const structType = field.type as Struct;
-    const childVectors = structType.children.map((childField) =>
-      createNullVector(childField, numRows),
-    );
-
-    // Create struct data
-    const structData = makeData({
-      type: structType,
-      length: numRows,
-      nullCount: 0,
-      children: childVectors.map((v) => v.data[0]),
-    });
-
-    return arrowMakeVector(structData);
-  } else {
-    // For other types, create a vector of nulls
-    const nullBitmap = new Uint8Array(Math.ceil(numRows / 8));
-    // All bits are 0, meaning all values are null
-
-    const data = makeData({
-      type: field.type,
-      length: numRows,
-      nullCount: numRows,
-      nullBitmap,
-    });
-
-    return arrowMakeVector(data);
-  }
-}
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -85,9 +85,6 @@ export interface OpenTableOptions {
  /**
   * Set the size of the index cache, specified as a number of entries
   *
-   * @deprecated Use session-level cache configuration instead.
-   * Create a Session with custom cache sizes and pass it to the connect() function.
-   *
   * The exact meaning of an "entry" will depend on the type of index:
   * - IVF: there is one entry for each IVF partition
   * - BTREE: there is one entry for the entire index
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -10,7 +10,6 @@ import {
 import {
  ConnectionOptions,
  Connection as LanceDbConnection,
-  Session,
 } from "./native.js";

 export {
@@ -52,8 +51,6 @@ export {
  OpenTableOptions,
 } from "./connection";

-export { Session } from "./native.js";
-
 export {
  ExecutableQuery,
  Query,
@@ -67,10 +64,7 @@ export {
  PhraseQuery,
  BoostQuery,
  MultiMatchQuery,
-  BooleanQuery,
  FullTextQueryType,
-  Operator,
-  Occur,
 } from "./query";

 export {
@@ -103,7 +97,6 @@ export {
  RecordBatchLike,
  DataLike,
  IntoVector,
-  MultiVector,
 } from "./arrow";
 export { IntoSql, packBits } from "./util";

@@ -134,7 +127,6 @@ export { IntoSql, packBits } from "./util";
 export async function connect(
  uri: string,
  options?: Partial<ConnectionOptions>,
-  session?: Session,
 ): Promise<Connection>;
 /**
 * Connect to a LanceDB instance at the given URI.
@@ -153,43 +145,31 @@ export async function connect(
 *   storageOptions: {timeout: "60s"}
 * });
 * ```
- *
- * @example
- * ```ts
- * const session = Session.default();
- * const conn = await connect({
- *   uri: "/path/to/database",
- *   session: session
- * });
- * ```
 */
 export async function connect(
  options: Partial<ConnectionOptions> & { uri: string },
 ): Promise<Connection>;
 export async function connect(
  uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
-  options?: Partial<ConnectionOptions>,
+  options: Partial<ConnectionOptions> = {},
 ): Promise<Connection> {
  let uri: string | undefined;
-  let finalOptions: Partial<ConnectionOptions> = {};
-
  if (typeof uriOrOptions !== "string") {
    const { uri: uri_, ...opts } = uriOrOptions;
    uri = uri_;
-    finalOptions = opts;
+    options = opts;
  } else {
    uri = uriOrOptions;
-    finalOptions = options || {};
  }

  if (!uri) {
    throw new Error("uri is required");
  }

-  finalOptions = (finalOptions as ConnectionOptions) ?? {};
-  (<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
-    (<ConnectionOptions>finalOptions).storageOptions,
+  options = (options as ConnectionOptions) ?? {};
+  (<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
+    (<ConnectionOptions>options).storageOptions,
  );
-  const nativeConn = await LanceDbConnection.new(uri, finalOptions);
+  const nativeConn = await LanceDbConnection.new(uri, options);
  return new LocalConnection(nativeConn);
 }
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -439,7 +439,7 @@ export interface FtsOptions {
   *
   * "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
   */
-  baseTokenizer?: "simple" | "whitespace" | "raw" | "ngram";
+  baseTokenizer?: "simple" | "whitespace" | "raw";

  /**
   * language for stemming and stop words
@@ -472,21 +472,6 @@ export interface FtsOptions {
   * whether to remove punctuation
   */
  asciiFolding?: boolean;
-
-  /**
-   * ngram min length
-   */
-  ngramMinLength?: number;
-
-  /**
-   * ngram max length
-   */
-  ngramMaxLength?: number;
-
-  /**
-   * whether to only index the prefix of the token for ngram tokenizer
-   */
-  prefixOnly?: boolean;
 }

 export class Index {
@@ -623,9 +608,6 @@ export class Index {
        options?.stem,
        options?.removeStopWords,
        options?.asciiFolding,
-        options?.ngramMinLength,
-        options?.ngramMaxLength,
-        options?.prefixOnly,
      ),
    );
  }
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -448,10 +448,6 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
   * For best results we recommend tuning this parameter with a benchmark against
   * your actual data to find the smallest possible value that will still give
   * you the desired recall.
-   *
-   * For more fine grained control over behavior when you have a very narrow filter
-   * you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
-   * the minimum and maximum to the same value.
   */
  nprobes(nprobes: number): VectorQuery {
    super.doCall((inner) => inner.nprobes(nprobes));
@@ -459,33 +455,6 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    return this;
  }

-  /**
-   * Set the minimum number of probes used.
-   *
-   * This controls the minimum number of partitions that will be searched.  This
-   * parameter will impact every query against a vector index, regardless of the
-   * filter.  See `nprobes` for more details.  Higher values will increase recall
-   * but will also increase latency.
-   */
-  minimumNprobes(minimumNprobes: number): VectorQuery {
-    super.doCall((inner) => inner.minimumNprobes(minimumNprobes));
-    return this;
-  }
-
-  /**
-   * Set the maximum number of probes used.
-   *
-   * This controls the maximum number of partitions that will be searched.  If this
-   * number is greater than minimumNprobes then the excess partitions will _only_ be
-   * searched if we have not found enough results.  This can be useful when there is
-   * a narrow filter to allow these queries to spend more time searching and avoid
-   * potential false negatives.
-   */
-  maximumNprobes(maximumNprobes: number): VectorQuery {
-    super.doCall((inner) => inner.maximumNprobes(maximumNprobes));
-    return this;
-  }
-
  /*
   * Set the distance range to use
   *
@@ -793,31 +762,6 @@ export enum FullTextQueryType {
  MatchPhrase = "match_phrase",
  Boost = "boost",
  MultiMatch = "multi_match",
-  Boolean = "boolean",
-}
-
-/**
- * Enum representing the logical operators used in full-text queries.
- *
- * - `And`: All terms must match.
- * - `Or`: At least one term must match.
- */
-export enum Operator {
-  And = "AND",
-  Or = "OR",
-}
-
-/**
- * Enum representing the occurrence of terms in full-text queries.
- *
- * - `Must`: The term must be present in the document.
- * - `Should`: The term should contribute to the document score, but is not required.
- * - `MustNot`: The term must not be present in the document.
- */
-export enum Occur {
-  Should = "SHOULD",
-  Must = "MUST",
-  MustNot = "MUST_NOT",
 }

 /**
@@ -847,7 +791,6 @@ export function instanceOfFullTextQuery(obj: any): obj is FullTextQuery {
 export class MatchQuery implements FullTextQuery {
  /** @ignore */
  public readonly inner: JsFullTextQuery;
-
  /**
   * Creates an instance of MatchQuery.
   *
@@ -857,8 +800,6 @@ export class MatchQuery implements FullTextQuery {
   *   - `boost`: The boost factor for the query (default is 1.0).
   *   - `fuzziness`: The fuzziness level for the query (default is 0).
   *   - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
-   *   - `operator`: The logical operator to use for combining terms in the query (default is "OR").
-   *   - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
   */
  constructor(
    query: string,
@@ -867,8 +808,6 @@ export class MatchQuery implements FullTextQuery {
      boost?: number;
      fuzziness?: number;
      maxExpansions?: number;
-      operator?: Operator;
-      prefixLength?: number;
    },
  ) {
    let fuzziness = options?.fuzziness;
@@ -881,8 +820,6 @@ export class MatchQuery implements FullTextQuery {
      options?.boost ?? 1.0,
      fuzziness,
      options?.maxExpansions ?? 50,
-      options?.operator ?? Operator.Or,
-      options?.prefixLength ?? 0,
    );
  }

@@ -899,11 +836,9 @@ export class PhraseQuery implements FullTextQuery {
   *
   * @param query - The phrase to search for in the specified column.
   * @param column - The name of the column to search within.
-   * @param options - Optional parameters for the phrase query.
-   *   - `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
   */
-  constructor(query: string, column: string, options?: { slop?: number }) {
-    this.inner = JsFullTextQuery.phraseQuery(query, column, options?.slop ?? 0);
+  constructor(query: string, column: string) {
+    this.inner = JsFullTextQuery.phraseQuery(query, column);
  }

  queryType(): FullTextQueryType {
@@ -954,21 +889,18 @@ export class MultiMatchQuery implements FullTextQuery {
   * @param columns - An array of column names to search within.
   * @param options - Optional parameters for the multi-match query.
   *  - `boosts`: An array of boost factors for each column (default is 1.0 for all).
-   *  - `operator`: The logical operator to use for combining terms in the query (default is "OR").
   */
  constructor(
    query: string,
    columns: string[],
    options?: {
      boosts?: number[];
-      operator?: Operator;
    },
  ) {
    this.inner = JsFullTextQuery.multiMatchQuery(
      query,
      columns,
      options?.boosts,
-      options?.operator ?? Operator.Or,
    );
  }

@@ -976,23 +908,3 @@ export class MultiMatchQuery implements FullTextQuery {
    return FullTextQueryType.MultiMatch;
  }
 }
-
-export class BooleanQuery implements FullTextQuery {
-  /** @ignore */
-  public readonly inner: JsFullTextQuery;
-  /**
-   * Creates an instance of BooleanQuery.
-   *
-   * @param queries - An array of (Occur, FullTextQuery objects) to combine.
-   * Occur specifies whether the query must match, or should match.
-   */
-  constructor(queries: [Occur, FullTextQuery][]) {
-    this.inner = JsFullTextQuery.booleanQuery(
-      queries.map(([occur, query]) => [occur, query.inner]),
-    );
-  }
-
-  queryType(): FullTextQueryType {
-    return FullTextQueryType.Boolean;
-  }
-}
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -6,11 +6,9 @@ import {
  Data,
  DataType,
  IntoVector,
-  MultiVector,
  Schema,
  dataTypeToJson,
  fromDataToBuffer,
-  isMultiVector,
  tableFromIPC,
 } from "./arrow";

@@ -77,10 +75,10 @@ export interface OptimizeOptions {
   * // Delete all versions older than 1 day
   * const olderThan = new Date();
   * olderThan.setDate(olderThan.getDate() - 1));
-   * tbl.optimize({cleanupOlderThan: olderThan});
+   * tbl.cleanupOlderVersions(olderThan);
   *
   * // Delete all versions except the current version
-   * tbl.optimize({cleanupOlderThan: new Date()});
+   * tbl.cleanupOlderVersions(new Date());
   */
  cleanupOlderThan: Date;
  deleteUnverified: boolean;
@@ -348,7 +346,7 @@ export abstract class Table {
   * if the query is a string and no embedding function is defined, it will be treated as a full text search query
   */
  abstract search(
-    query: string | IntoVector | MultiVector | FullTextQuery,
+    query: string | IntoVector | FullTextQuery,
    queryType?: string,
    ftsColumns?: string | string[],
  ): VectorQuery | Query;
@@ -359,7 +357,7 @@ export abstract class Table {
   * is the same thing as calling `nearestTo` on the builder returned
   * by `query`.  @see {@link Query#nearestTo} for more details.
   */
-  abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
+  abstract vectorSearch(vector: IntoVector): VectorQuery;
  /**
   * Add new columns with defined values.
   * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
@@ -670,7 +668,7 @@ export class LocalTable extends Table {
  }

  search(
-    query: string | IntoVector | MultiVector | FullTextQuery,
+    query: string | IntoVector | FullTextQuery,
    queryType: string = "auto",
    ftsColumns?: string | string[],
  ): VectorQuery | Query {
@@ -717,15 +715,7 @@ export class LocalTable extends Table {
    return this.query().nearestTo(queryPromise);
  }

-  vectorSearch(vector: IntoVector | MultiVector): VectorQuery {
-    if (isMultiVector(vector)) {
-      const query = this.query().nearestTo(vector[0]);
-      for (const v of vector.slice(1)) {
-        query.addQueryVector(v);
-      }
-      return query;
-    }
-
+  vectorSearch(vector: IntoVector): VectorQuery {
    return this.query().nearestTo(vector);
  }

--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,11 +1,11 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.2",
+	"version": "0.20.0-beta.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
 	"files": ["lancedb.darwin-arm64.node"],
-	"license": "Apache-2.0",
+	"license": "Apache 2.0",
 	"engines": {
 		"node": ">= 18"
 	}
--- a/Show More
+++ b/Show More