Bump version: 0.25.1-beta.2 → 0.25.1-beta.3

chore: increase pypi publish timeout and use warp runner for arm64 (#2670 )
Fix failures like: https://github.com/lancedb/lancedb/actions/runs/17840462235/job/50748940233 ARM64 build cannot succeed within 1 hour, x86-64 build sometimes cannot succeed within 1 hour.
2025-12-23 05:19:58 +00:00 · 2025-09-22 04:47:42 +00:00 · 2025-09-21 21:42:44 -07:00 · 2025-09-21 21:28:40 -07:00 · 2025-09-19 09:17:28 -07:00 · 2025-09-18 13:29:47 -07:00
225 changed files with 15052 additions and 16904 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.20.1-beta.2"
+current_version = "0.22.1-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -50,11 +50,6 @@ pre_commit_hooks = [
 optional_value = "final"
 values = ["beta", "final"]
 [[tool.bumpversion.files]]
 filename = "node/package.json"
 replace = "\"version\": \"{new_version}\","
 search = "\"version\": \"{current_version}\","
 [[tool.bumpversion.files]]
 filename = "nodejs/package.json"
 replace = "\"version\": \"{new_version}\","
@@ -66,39 +61,8 @@ glob = "nodejs/npm/*/package.json"
 replace = "\"version\": \"{new_version}\","
 search = "\"version\": \"{current_version}\","
 # vectodb node binary packages
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
 search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
 search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
 search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
 search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
 [[tool.bumpversion.files]]
 glob = "node/package.json"
 replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
 search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
 # Cargo files
 # ------------
 [[tool.bumpversion.files]]
 filename = "rust/ffi/node/Cargo.toml"
 replace = "\nversion = \"{new_version}\""
 search = "\nversion = \"{current_version}\""
 [[tool.bumpversion.files]]
 filename = "rust/lancedb/Cargo.toml"
 replace = "\nversion = \"{new_version}\""
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -5,8 +5,8 @@ on:
    tags-ignore:
      # We don't publish pre-releases for Rust. Crates.io is just a source
      # distribution, so we don't need to publish pre-releases.
-      - 'v*-beta*'
+      - "v*-beta*"
-      - '*-v*' # for example, python-vX.Y.Z
+      - "*-v*" # for example, python-vX.Y.Z
 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,6 +19,8 @@ env:
 jobs:
  build:
    runs-on: ubuntu-22.04
    permissions:
      id-token: write
    timeout-minutes: 30
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -31,6 +33,8 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
      - uses: rust-lang/crates-io-auth-action@v1
        id: auth
      - name: Publish the package
        run: |
-          cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
+          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -56,22 +56,11 @@ jobs:
        with:
          node-version: 20
          cache: 'npm'
          cache-dependency-path: node/package-lock.json
      - name: Install node dependencies
        working-directory: node
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
      - name: Build node
        working-directory: node
        run: |
          npm ci
          npm run build
          npm run tsc
      - name: Create markdown files
        working-directory: node
        run: |
          npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
      - name: Build docs
        working-directory: docs
        run: |
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -24,7 +24,8 @@ env:
 jobs:
  test-python:
    name: Test doc python code
-    runs-on: ubuntu-24.04
+    runs-on: warp-ubuntu-2204-x64-8x
    timeout-minutes: 60
    steps:
    - name: Checkout
      uses: actions/checkout@v4
@@ -58,51 +59,3 @@ jobs:
      run: |
        cd docs/test/python
        for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
  test-node:
    name: Test doc nodejs code
    runs-on: ubuntu-24.04
    timeout-minutes: 60
    strategy:
      fail-fast: false
    steps:
    - name: Checkout
      uses: actions/checkout@v4
      with:
        fetch-depth: 0
        lfs: true
    - name: Print CPU capabilities
      run: cat /proc/cpuinfo
    - name: Set up Node
      uses: actions/setup-node@v4
      with:
        node-version: 20
    - name: Install protobuf
      run: |
        sudo apt update
        sudo apt install -y protobuf-compiler
    - name: Install dependecies needed for ubuntu
      run: |
        sudo apt install -y libssl-dev
        rustup update && rustup default
    - name: Rust cache
      uses: swatinem/rust-cache@v2
    - name: Install node dependencies
      run: |
        sudo swapoff -a
        sudo fallocate -l 8G /swapfile
        sudo chmod 600 /swapfile
        sudo mkswap /swapfile
        sudo swapon /swapfile
        sudo swapon --show
        cd node
        npm ci
        npm run build-release
        cd ../docs
        npm install
    - name: Test
      env:
        LANCEDB_URI: ${{ secrets.LANCEDB_URI }}
        LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }}
      run: |
        cd docs
        npm t
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -1,147 +0,0 @@
 name: Node
 on:
  push:
    branches:
      - main
  pull_request:
    paths:
      - node/**
      - rust/ffi/node/**
      - .github/workflows/node.yml
      - docker-compose.yml
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
 env:
  # Disable full debug symbol generation to speed up CI build and keep memory down
  # "1" means line tables only, which is useful for panic tracebacks.
  #
  # Use native CPU to accelerate tests if possible, especially for f16
  # target-cpu=haswell fixes failing ci build
  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
  RUST_BACKTRACE: "1"
 jobs:
  linux:
    name: Linux (Node ${{ matrix.node-version }})
    timeout-minutes: 30
    strategy:
      matrix:
        node-version: [ "18", "20" ]
    runs-on: "ubuntu-22.04"
    defaults:
      run:
        shell: bash
        working-directory: node
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0
        lfs: true
    - uses: actions/setup-node@v3
      with:
        node-version: ${{ matrix.node-version }}
        cache: 'npm'
        cache-dependency-path: node/package-lock.json
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
        sudo apt update
        sudo apt install -y protobuf-compiler libssl-dev
    - name: Build
      run: |
        npm ci
        npm run build
        npm run pack-build
        npm install --no-save ./dist/lancedb-vectordb-*.tgz
        # Remove index.node to test with dependency installed
        rm index.node
    - name: Test
      run: npm run test
  macos:
    timeout-minutes: 30
    runs-on: "macos-13"
    defaults:
      run:
        shell: bash
        working-directory: node
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0
        lfs: true
    - uses: actions/setup-node@v3
      with:
        node-version: 20
        cache: 'npm'
        cache-dependency-path: node/package-lock.json
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: brew install protobuf
    - name: Build
      run: |
        npm ci
        npm run build
        npm run pack-build
        npm install --no-save ./dist/lancedb-vectordb-*.tgz
        # Remove index.node to test with dependency installed
        rm index.node
    - name: Test
      run: |
        npm run test
  aws-integtest:
    timeout-minutes: 45
    runs-on: "ubuntu-22.04"
    defaults:
      run:
        shell: bash
        working-directory: node
    env:
      AWS_ACCESS_KEY_ID: ACCESSKEY
      AWS_SECRET_ACCESS_KEY: SECRETKEY
      AWS_DEFAULT_REGION: us-west-2
      # this one is for s3
      AWS_ENDPOINT: http://localhost:4566
      # this one is for dynamodb
      DYNAMODB_ENDPOINT: http://localhost:4566
      ALLOW_HTTP: true
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0
        lfs: true
    - uses: actions/setup-node@v3
      with:
        node-version: 20
        cache: 'npm'
        cache-dependency-path: node/package-lock.json
    - name: start local stack
      run: docker compose -f ../docker-compose.yml up -d --wait
    - name: create s3
      run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
    - name: create ddb
      run: |
        aws dynamodb create-table \
          --table-name lancedb-integtest \
          --attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
          --key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
          --provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
          --endpoint-url $DYNAMODB_ENDPOINT
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
        sudo apt update
        sudo apt install -y protobuf-compiler libssl-dev
    - name: Build
      run: |
        npm ci
        npm run build
        npm run pack-build
        npm install --no-save ./dist/lancedb-vectordb-*.tgz
        # Remove index.node to test with dependency installed
        rm index.node
    - name: Test
      run: npm run integration-test
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -79,7 +79,7 @@ jobs:
      with:
        node-version: ${{ matrix.node-version }}
        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
+        cache-dependency-path: nodejs/package-lock.json
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
@@ -137,7 +137,7 @@ jobs:
      with:
        node-version: 20
        cache: 'npm'
-        cache-dependency-path: node/package-lock.json
+        cache-dependency-path: nodejs/package-lock.json
    - uses: Swatinem/rust-cache@v2
    - name: Install dependencies
      run: |
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,200 +365,3 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
  # ----------------------------------------------------------------------------
  # vectordb release (legacy)
  # ----------------------------------------------------------------------------
  # TODO: delete this when we drop vectordb
  node:
    name: vectordb Typescript
    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash
        working-directory: node
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - uses: actions/setup-node@v3
        with:
          node-version: 20
          cache: "npm"
          cache-dependency-path: node/package-lock.json
      - name: Install dependencies
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
      - name: Build
        run: |
          npm ci
          npm run tsc
          npm pack
      - name: Upload Linux Artifacts
        uses: actions/upload-artifact@v4
        with:
          name: node-package
          path: |
            node/vectordb-*.tgz
  node-macos:
    name: vectordb ${{ matrix.config.arch }}
    strategy:
      matrix:
        config:
          - arch: x86_64-apple-darwin
            runner: macos-13
          - arch: aarch64-apple-darwin
            # xlarge is implicitly arm64.
            runner: macos-14
    runs-on: ${{ matrix.config.runner }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Install system dependencies
        run: brew install protobuf
      - name: Install npm dependencies
        run: |
          cd node
          npm ci
      - name: Build MacOS native node modules
        run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
      - name: Upload Darwin Artifacts
        uses: actions/upload-artifact@v4
        with:
          name: node-native-darwin-${{ matrix.config.arch }}
          path: |
            node/dist/lancedb-vectordb-darwin*.tgz
  node-linux-gnu:
    name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
    runs-on: ${{ matrix.config.runner }}
    strategy:
      fail-fast: false
      matrix:
        config:
          - arch: x86_64
            runner: ubuntu-latest
          - arch: aarch64
            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
            runner: warp-ubuntu-latest-arm64-4x
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      # To avoid OOM errors on ARM, we create a swap file.
      - name: Configure aarch64 build
        if: ${{ matrix.config.arch == 'aarch64' }}
        run: |
          free -h
          sudo fallocate -l 16G /swapfile
          sudo chmod 600 /swapfile
          sudo mkswap /swapfile
          sudo swapon /swapfile
          echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
          # print info
          swapon --show
          free -h
      - name: Build Linux Artifacts
        run: |
          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
      - name: Upload Linux Artifacts
        uses: actions/upload-artifact@v4
        with:
          name: node-native-linux-${{ matrix.config.arch }}-gnu
          path: |
            node/dist/lancedb-vectordb-linux*.tgz
  node-windows:
    name: vectordb ${{ matrix.target }}
    runs-on: windows-2022
    strategy:
      fail-fast: false
      matrix:
        target: [x86_64-pc-windows-msvc]
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Install Protoc v21.12
        working-directory: C:\
        run: |
          New-Item -Path 'C:\protoc' -ItemType Directory
          Set-Location C:\protoc
          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
          7z x protoc.zip
          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
        shell: powershell
      - name: Install npm dependencies
        run: |
          cd node
          npm ci
      - name: Build Windows native node modules
        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
      - name: Upload Windows Artifacts
        uses: actions/upload-artifact@v4
        with:
          name: node-native-windows
          path: |
            node/dist/lancedb-vectordb-win32*.tgz
  release:
    name: vectordb NPM Publish
    needs: [node, node-macos, node-linux-gnu, node-windows]
    runs-on: ubuntu-latest
    permissions:
      contents: write
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
      - uses: actions/download-artifact@v4
        with:
          pattern: node-*
      - name: Display structure of downloaded files
        run: ls -R
      - uses: actions/setup-node@v3
        with:
          node-version: 20
          registry-url: "https://registry.npmjs.org"
      - name: Publish to NPM
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
        run: |
          # Tag beta as "preview" instead of default "latest". See lancedb
          # npm publish step for more info.
          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
            PUBLISH_ARGS="--tag preview"
          fi
          mv */*.tgz .
          for filename in *.tgz; do
            npm publish $PUBLISH_ARGS $filename
          done
      - name: Deprecate
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
        # We need to deprecate the old package to avoid confusion.
        # Each time we publish a new version, it gets undeprecated.
        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
      - name: Checkout
        uses: actions/checkout@v4
        with:
          ref: main
      - name: Update package-lock.json
        run: |
          git config user.name 'Lance Release'
          git config user.email 'lance-dev@lancedb.com'
          bash ci/update_lockfiles.sh
      - name: Push new commit
        uses: ad-m/github-push-action@master
        with:
          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
          branch: main
      - name: Notify Slack Action
        uses: ravsamhq/notify-slack-action@2.3.0
        if: ${{ always() }}
        with:
          status: ${{ job.status }}
          notify_when: "failure"
          notification_title: "{workflow} is failing"
        env:
          SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -56,7 +56,7 @@ jobs:
          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          fury_token: ${{ secrets.FURY_TOKEN }}
  mac:
-    timeout-minutes: 60
+    timeout-minutes: 90
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
@@ -64,7 +64,7 @@ jobs:
          - target: x86_64-apple-darwin
            runner: macos-13
          - target: aarch64-apple-darwin
-            runner: macos-14
+            runner: warp-macos-14-arm64-6x
    env:
      MACOSX_DEPLOYMENT_TARGET: 10.15
    steps:
--- a/.gitignore
+++ b/.gitignore
@@ -31,9 +31,6 @@ python/dist
 *.node
 **/node_modules
 **/.DS_Store
 node/dist
 node/examples/**/package-lock.json
 node/examples/**/dist
 nodejs/lancedb/native*
 dist
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,80 @@
 LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
 It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
 remote (against LanceDB Cloud).
 The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
 Project layout:
 * `rust/lancedb`: The LanceDB core Rust implementation.
 * `python`: The Python bindings, using PyO3.
 * `nodejs`: The Typescript bindings, using napi-rs
 * `java`: The Java bindings
 Common commands:
 * Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
 * Run tests: `cargo test --quiet --features remote --tests`
 * Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
 * Lint: `cargo clippy --quiet --features remote --tests --examples`
 * Format: `cargo fmt --all`
 Before committing changes, run formatting.
 ## Coding tips
 * When writing Rust doctests for things that require a connection or table reference,
  write them as a function instead of a fully executable test. This allows type checking
  to run but avoids needing a full test environment. For example:
    ```rust
    /// ```
    /// use lance_index::scalar::FullTextSearchQuery;
    /// use lancedb::query::{QueryBase, ExecutableQuery};
    ///
    /// # use lancedb::Table;
    /// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
    /// let results = table.query()
    ///     .full_text_search(FullTextSearchQuery::new("hello world".into()))
    ///     .execute()
    ///     .await?;
    /// # Ok(())
    /// # }
    /// ```
    ```
 ## Example plan: adding a new method on Table
 Adding a new method involves first adding it to the Rust core, then exposing it
 in the Python and TypeScript bindings. There are both local and remote tables.
 Remote tables are implemented via a HTTP API and require the `remote` cargo
 feature flag to be enabled. Python has both sync and async methods.
 Rust core changes:
 1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
 2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
 3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
    * Test with unit test in `rust/lancedb/src/table.rs`.
 4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
    * Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
 Python bindings changes:
 1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
 2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
 3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
 4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
 5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
    * Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
 6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
 7. Add unit test in `python/tests/test_table.py`.
 TypeScript bindings changes:
 1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
 2. Run `npm run build` to generate TypeScript definitions.
 3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
 4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
    * Note: despite the name, this class is also used for remote tables.
 5. Add test in `nodejs/__test__/table.test.ts`.
 6. Run `npm run docs` to generate TypeScript documentation.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,11 +1,5 @@
 [workspace]
-members = [
+members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
    "rust/ffi/node",
    "rust/lancedb",
    "nodejs",
    "python",
    "java/core/lancedb-jni",
 ]
 # Python package needs to be built by maturin.
 exclude = ["python"]
 resolver = "2"
@@ -21,14 +15,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"
 [workspace.dependencies]
-lance = { "version" = "=0.30.0", "features" = ["dynamodb"] }
+lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = "=0.30.0"
+lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = "=0.30.0"
+lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = "=0.30.0"
+lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = "=0.30.0"
+lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = "=0.30.0"
+lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = "=0.30.0"
+lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = "=0.30.0"
+lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
@@ -39,20 +33,20 @@ arrow-schema = "55.1"
 arrow-arith = "55.1"
 arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "47.0", default-features = false }
+datafusion = { version = "49.0", default-features = false }
-datafusion-catalog = "47.0"
+datafusion-catalog = "49.0"
-datafusion-common = { version = "47.0", default-features = false }
+datafusion-common = { version = "49.0", default-features = false }
-datafusion-execution = "47.0"
+datafusion-execution = "49.0"
-datafusion-expr = "47.0"
+datafusion-expr = "49.0"
-datafusion-physical-plan = "47.0"
+datafusion-physical-plan = "49.0"
 env_logger = "0.11"
-half = { "version" = "=2.5.0", default-features = false, features = [
+half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.11.0"
+object_store = "0.12.0"
 pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
@@ -61,12 +55,11 @@ rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
 crunchy = "0.2.4"
 # Temporary pins to work around downstream issues
 # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
 chrono = "=0.4.41"
 # https://github.com/RustCrypto/formats/issues/1684
 base64ct = "=1.6.0"
 # Workaround for: https://github.com/eira-fransham/crunchy/issues/13
 crunchy = "=0.2.2"
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -1,22 +0,0 @@
 #!/bin/bash
 set -e
 ARCH=${1:-x86_64}
 TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
 # We pass down the current user so that when we later mount the local files
 # into the container, the files are accessible by the current user.
 pushd ci/manylinux_node
 docker build \
    -t lancedb-node-manylinux \
    --build-arg="ARCH=$ARCH" \
    --build-arg="DOCKER_USER=$(id -u)" \
    --progress=plain \
    .
 popd
 # We turn on memory swap to avoid OOM killer
 docker run \
    -v $(pwd):/io -w /io \
    --memory-swap=-1 \
    lancedb-node-manylinux \
    bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
--- a/ci/build_macos_artifacts.sh
+++ b/ci/build_macos_artifacts.sh
@@ -1,34 +0,0 @@
 # Builds the macOS artifacts (node binaries).
 # Usage: ./ci/build_macos_artifacts.sh [target]
 # Targets supported: x86_64-apple-darwin aarch64-apple-darwin
 set -e
 prebuild_rust() {
    # Building here for the sake of easier debugging.
    pushd rust/ffi/node
    echo "Building rust library for $1"
    export RUST_BACKTRACE=1
    cargo build --release --target $1
    popd
 }
 build_node_binaries() {
    pushd node
    echo "Building node library for $1"
    npm run build-release -- --target $1
    npm run pack-build -- --target $1
    popd
 }
 if [ -n "$1" ]; then
    targets=$1
 else
    targets="x86_64-apple-darwin aarch64-apple-darwin"
 fi
 echo "Building artifacts for targets: $targets"
 for target in $targets
    do
    prebuild_rust $target
    build_node_binaries $target
 done
--- a/ci/build_windows_artifacts.ps1
+++ b/ci/build_windows_artifacts.ps1
@@ -1,42 +0,0 @@
 # Builds the Windows artifacts (node binaries).
 # Usage:  .\ci\build_windows_artifacts.ps1 [target]
 # Targets supported:
 # - x86_64-pc-windows-msvc
 # - i686-pc-windows-msvc
 # - aarch64-pc-windows-msvc
 function Prebuild-Rust {
    param (
        [string]$target
    )
    # Building here for the sake of easier debugging.
    Push-Location -Path "rust/ffi/node"
    Write-Host "Building rust library for $target"
    $env:RUST_BACKTRACE=1
    cargo build --release --target $target
    Pop-Location
 }
 function Build-NodeBinaries {
    param (
        [string]$target
    )
    Push-Location -Path "node"
    Write-Host "Building node library for $target"
    npm run build-release -- --target $target
    npm run pack-build -- --target $target
    Pop-Location
 }
 $targets = $args[0]
 if (-not $targets) {
    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
 }
 Write-Host "Building artifacts for targets: $targets"
 foreach ($target in $targets) {
    Prebuild-Rust $target
    Build-NodeBinaries $target
 }
--- a/ci/build_windows_artifacts_nodejs.ps1
+++ b/ci/build_windows_artifacts_nodejs.ps1
@@ -1,42 +0,0 @@
 # Builds the Windows artifacts (nodejs binaries).
 # Usage:  .\ci\build_windows_artifacts_nodejs.ps1 [target]
 # Targets supported:
 # - x86_64-pc-windows-msvc
 # - i686-pc-windows-msvc
 # - aarch64-pc-windows-msvc
 function Prebuild-Rust {
    param (
        [string]$target
    )
    # Building here for the sake of easier debugging.
    Push-Location -Path "rust/lancedb"
    Write-Host "Building rust library for $target"
    $env:RUST_BACKTRACE=1
    cargo build --release --target $target
    Pop-Location
 }
 function Build-NodeBinaries {
    param (
        [string]$target
    )
    Push-Location -Path "nodejs"
    Write-Host "Building nodejs library for $target"
    $env:RUST_TARGET=$target
    npm run build-release
    Pop-Location
 }
 $targets = $args[0]
 if (-not $targets) {
    $targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
 }
 Write-Host "Building artifacts for targets: $targets"
 foreach ($target in $targets) {
    Prebuild-Rust $target
    Build-NodeBinaries $target
 }
--- a/ci/manylinux_node/Dockerfile
+++ b/ci/manylinux_node/Dockerfile
@@ -1,27 +0,0 @@
 # Many linux dockerfile with Rust, Node, and Lance dependencies installed.
 # This container allows building the node modules native libraries in an
 # environment with a very old glibc, so that we are compatible with a wide
 # range of linux distributions.
 ARG ARCH=x86_64
 FROM quay.io/pypa/manylinux_2_28_${ARCH}
 ARG ARCH=x86_64
 ARG DOCKER_USER=default_user
 # Protobuf is also installed as root.
 COPY install_protobuf.sh install_protobuf.sh
 RUN ./install_protobuf.sh ${ARCH}
 ENV DOCKER_USER=${DOCKER_USER}
 # Create a group and user, but only if it doesn't exist
 RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
 # We switch to the user to install Rust and Node, since those like to be
 # installed at the user level.
 USER ${DOCKER_USER}
 COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
 RUN cp /prepare_manylinux_node.sh $HOME/ && \
    cd $HOME && \
    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_node/build_vectordb.sh
+++ b/ci/manylinux_node/build_vectordb.sh
@@ -1,13 +0,0 @@
 #!/bin/bash
 # Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
 set -e
 ARCH=${1:-x86_64}
 TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
 #Alpine doesn't have .bashrc
 FILE=$HOME/.bashrc && test -f $FILE && source $FILE
 cd node
 npm ci
 npm run build-release
 npm run pack-build -- -t $TARGET_TRIPLE
--- a/ci/manylinux_node/install_protobuf.sh
+++ b/ci/manylinux_node/install_protobuf.sh
@@ -1,15 +0,0 @@
 #!/bin/bash
 # Installs protobuf compiler. Should be run as root.
 set -e
 if [[ $1 == x86_64* ]]; then
    ARCH=x86_64
 else
    # gnu target
    ARCH=aarch_64
 fi
 PB_REL=https://github.com/protocolbuffers/protobuf/releases
 PB_VERSION=23.1
 curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
 unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_node/prepare_manylinux_node.sh
+++ b/ci/manylinux_node/prepare_manylinux_node.sh
@@ -1,21 +0,0 @@
 #!/bin/bash
 set -e
 install_node() {
    echo "Installing node..."
    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
    source "$HOME"/.bashrc
    nvm install --no-progress 18
 }
 install_rust() {
    echo "Installing rust..."
    curl https://sh.rustup.rs -sSf | bash -s -- -y
    export PATH="$PATH:/root/.cargo/bin"
 }
 install_node
 install_rust
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -47,13 +47,59 @@ def extract_features(line: str) -> list:
    """
    import re
-    match = re.search(r'"features"\s*=\s*\[(.*?)\]', line)
+    match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
    if match:
        features_str = match.group(1)
-        return [f.strip('"') for f in features_str.split(",")]
+        return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
    return []
 def extract_default_features(line: str) -> bool:
    """
    Checks if default-features = false is present in a line in Cargo.toml.
    Example: 'lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }'
    Returns: True if default-features = false is present, False otherwise
    """
    import re
    match = re.search(r'default-features\s*=\s*false', line)
    return match is not None
 def dict_to_toml_line(package_name: str, config: dict) -> str:
    """
    Converts a configuration dictionary to a TOML dependency line.
    Dictionary insertion order is preserved (Python 3.7+), so the caller
    controls the order of fields in the output.
    Args:
        package_name: The name of the package (e.g., "lance", "lance-io")
        config: Dictionary with keys like "version", "path", "git", "tag", "features", "default-features"
                The order of keys in this dict determines the order in the output.
    Returns:
        A properly formatted TOML line with a trailing newline
    """
    # If only version is specified, use simple format
    if len(config) == 1 and "version" in config:
        return f'{package_name} = "{config["version"]}"\n'
    # Otherwise, use inline table format
    parts = []
    for key, value in config.items():
        if key == "default-features" and not value:
            parts.append("default-features = false")
        elif key == "features":
            parts.append(f'"features" = {json.dumps(value)}')
        elif isinstance(value, str):
            parts.append(f'"{key}" = "{value}"')
        else:
            # This shouldn't happen with our current usage
            parts.append(f'"{key}" = {json.dumps(value)}')
    return f'{package_name} = {{ {", ".join(parts)} }}\n'
 def update_cargo_toml(line_updater):
    """
    Updates the Cargo.toml file by applying the line_updater function to each line.
@@ -63,10 +109,31 @@ def update_cargo_toml(line_updater):
        lines = f.readlines()
    new_lines = []
    lance_line = ""
    is_parsing_lance_line = False
    for line in lines:
        if line.startswith("lance"):
-            # Update the line using the provided function
+            # Check if this is a single-line or multi-line entry
-            new_lines.append(line_updater(line))
+            # Single-line entries either:
            # 1. End with } (complete inline table)
            # 2. End with " (simple version string)
            # Multi-line entries start with { but don't end with }
            if line.strip().endswith("}") or line.strip().endswith('"'):
                # Single-line entry - process immediately
                new_lines.append(line_updater(line))
            elif "{" in line and not line.strip().endswith("}"):
                # Multi-line entry - start accumulating
                lance_line = line
                is_parsing_lance_line = True
            else:
                # Single-line entry without quotes or braces (shouldn't happen but handle it)
                new_lines.append(line_updater(line))
        elif is_parsing_lance_line:
            lance_line += line
            if line.strip().endswith("}"):
                new_lines.append(line_updater(lance_line))
                lance_line = ""
                is_parsing_lance_line = False
        else:
            # Keep the line unchanged
            new_lines.append(line)
@@ -78,18 +145,25 @@ def update_cargo_toml(line_updater):
 def set_stable_version(version: str):
    """
    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }
+    lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }
-    lance-io = "=0.29.0"
+    lance-io = { "version" = "=0.29.0", default-features = false }
    ...
    """
    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
        # Build config in desired order: version, default-features, features
        config = {"version": f"={version}"}
        if extract_default_features(line):
            config["default-features"] = False
        features = extract_features(line)
        if features:
-            return f'{package_name} = {{ "version" = "={version}", "features" = {json.dumps(features)} }}\n'
+            config["features"] = features
-        else:
+
-            return f'{package_name} = "={version}"\n'
+        return dict_to_toml_line(package_name, config)
    update_cargo_toml(line_updater)
@@ -97,19 +171,29 @@ def set_stable_version(version: str):
 def set_preview_version(version: str):
    """
    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+    lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
-    lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+    lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
    ...
    """
    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
        features = extract_features(line)
        base_version = version.split("-")[0]  # Get the base version without beta suffix
        # Build config in desired order: version, default-features, features, tag, git
        config = {"version": f"={base_version}"}
        if extract_default_features(line):
            config["default-features"] = False
        features = extract_features(line)
        if features:
-            return f'{package_name} = {{ "version" = "={base_version}", "features" = {json.dumps(features)}, "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
+            config["features"] = features
-        else:
+
-            return f'{package_name} = {{ "version" = "={base_version}", "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
+        config["tag"] = f"v{version}"
        config["git"] = "https://github.com/lancedb/lance.git"
        return dict_to_toml_line(package_name, config)
    update_cargo_toml(line_updater)
@@ -117,18 +201,25 @@ def set_preview_version(version: str):
 def set_local_version():
    """
    Sets lines to
-    lance = { path = "../lance/rust/lance", features = ["dynamodb"] }
+    lance = { "path" = "../lance/rust/lance", default-features = false, "features" = ["dynamodb"] }
-    lance-io = { path = "../lance/rust/lance-io" }
+    lance-io = { "path" = "../lance/rust/lance-io", default-features = false }
    ...
    """
    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
        # Build config in desired order: path, default-features, features
        config = {"path": f"../lance/rust/{package_name}"}
        if extract_default_features(line):
            config["default-features"] = False
        features = extract_features(line)
        if features:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}", "features" = {json.dumps(features)} }}\n'
+            config["features"] = features
-        else:
+
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}" }}\n'
+        return dict_to_toml_line(package_name, config)
    update_cargo_toml(line_updater)
--- a/ci/update_lockfiles.sh
+++ b/ci/update_lockfiles.sh
@@ -15,16 +15,13 @@ cargo metadata --quiet > /dev/null
 pushd nodejs || exit 1
 npm install --package-lock-only --silent
 popd
 pushd node || exit 1
 npm install --package-lock-only --silent
 popd
 if git diff --quiet --exit-code; then
  echo "No lockfile changes to commit; skipping amend."
 elif $AMEND; then
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
+  git add Cargo.lock nodejs/package-lock.json
  git commit --amend --no-edit
 else
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
+  git add Cargo.lock nodejs/package-lock.json
  git commit -m "Update lockfiles"
 fi
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.12.0",
+      "version": "0.21.2-beta.0",
      "cpu": [
        "x64",
        "arm64"
@@ -65,11 +65,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.12.0",
+        "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
-        "@lancedb/vectordb-darwin-x64": "0.12.0",
+        "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
+        "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
--- a/docs/src/concepts/data_management.md
+++ b/docs/src/concepts/data_management.md
@@ -13,7 +13,7 @@ The following concepts are important to keep in mind:
 - Data is versioned, with each insert operation creating a new version of the dataset and an update to the manifest that tracks versions via metadata
 !!! note
-    1. First, each version contains metadata and just the new/updated data in your transaction. So if you have 100 versions, they aren't 100 duplicates of the same data. However, they do have 100x the metadata overhead of a single version, which can result in slower queries.  
+    1. First, each version contains metadata and just the new/updated data in your transaction. So if you have 100 versions, they aren't 100 duplicates of the same data. However, they do have 100x the metadata overhead of a single version, which can result in slower queries.
    2. Second, these versions exist to keep LanceDB scalable and consistent. We do not immediately blow away old versions when creating new ones because other clients might be in the middle of querying the old version. It's important to retain older versions for as long as they might be queried.
 ## What are fragments?
@@ -37,6 +37,10 @@ Depending on the use case and dataset, optimal compaction will have different re
 - It’s always better to use *batch* inserts rather than adding 1 row at a time (to avoid too small fragments). If single-row inserts are unavoidable, run compaction on a regular basis to merge them into larger fragments.
 - Keep the number of fragments under 100, which is suitable for most use cases (for *really* large datasets of >500M rows, more fragments might be needed)
 !!! note
    LanceDB Cloud/Enterprise supports [auto-compaction](https://docs.lancedb.com/enterprise/architecture/architecture#write-path) which automatically optimizes fragments in the background as data changes.
 ## Deletion
 Although Lance allows you to delete rows from a dataset, it does not actually delete the data immediately. It simply marks the row as deleted in the `DataFile` that represents a fragment. For a given version of the dataset, each fragment can have up to one deletion file (if no rows were ever deleted from that fragment, it will not have a deletion file). This is important to keep in mind because it means that the data is still there, and can be recovered if needed, as long as that version still exists based on your backup policy.
@@ -50,13 +54,9 @@ Reindexing is the process of updating the index to account for new data, keeping
 Both LanceDB OSS and Cloud support reindexing, but the process (at least for now) is different for each, depending on the type of index.
-When a reindex job is triggered in the background, the entire data is reindexed, but in the interim as new queries come in, LanceDB will combine results from the existing index with exhaustive kNN search on the new data. This is done to ensure that you're still searching on all your data, but it does come at a performance cost. The more data that you add without reindexing, the impact on latency (due to exhaustive search) can be noticeable.
+In LanceDB OSS, re-indexing happens synchronously when you call either `create_index` or `optimize` on a table. In LanceDB Cloud, re-indexing happens asynchronously as you add and update data in your table.
-### Vector reindex
+By default, queries will search new data even if it has yet to be indexed. This is done using brute-force methods, such as kNN for vector search, and combined with the fast index search results. This is done to ensure that you're always searching over all your data, but it does come at a performance cost. Without reindexing, adding more data to a table will make queries slower and more expensive. This behavior can be disabled by setting the [fast_search](https://lancedb.github.io/lancedb/python/python/#lancedb.query.AsyncQuery.fast_search) parameter which will instruct the query to ignore un-indexed data.
-* LanceDB Cloud supports incremental reindexing, where a background process will trigger a new index build for you automatically when new data is added to a dataset
+* LanceDB Cloud/Enterprise supports [automatic incremental reindexing](https://docs.lancedb.com/core#vector-index) for vector, scalar, and FTS indices, where a background process will trigger a new index build for you automatically when new data is added or modified in a dataset
 * LanceDB OSS requires you to manually trigger a reindex operation -- we are working on adding incremental reindexing to LanceDB OSS as well
 ### FTS reindex
 FTS reindexing is supported in both LanceDB OSS and Cloud, but requires that it's manually rebuilt once you have a significant enough amount of new data added that needs to be reindexed. We [updated](https://github.com/lancedb/lancedb/pull/762) Tantivy's default heap size from 128MB to 1GB in LanceDB to make it much faster to reindex, by up to 10x from the default settings.
--- a/docs/src/guides/sql_querying.md
+++ b/docs/src/guides/sql_querying.md
@@ -1,7 +1,9 @@
 # SQL Querying
 You can use DuckDB and Apache Datafusion to query your LanceDB tables using SQL.
 This guide will show how to query Lance tables them using both.
-We will re-use the dataset [created previously](./pandas_and_pyarrow.md):
+We will re-use the dataset [created previously](./tables.md):
 ```python
 import lancedb
@@ -27,15 +29,10 @@ arrow_table = table.to_lance()
 duckdb.query("SELECT * FROM arrow_table")
 ```
-```
+| vector      | item | price |
-┌─────────────┬─────────┬────────┐
+| ----------- | ---- | ----- |
-│   vector    │  item   │ price  │
+| [3.1, 4.1]  | foo  | 10.0  |
-│   float[]   │ varchar │ double │
+| [5.9, 26.5] | bar  | 20.0  |
 ├─────────────┼─────────┼────────┤
 │ [3.1, 4.1]  │ foo     │   10.0 │
 │ [5.9, 26.5] │ bar     │   20.0 │
 └─────────────┴─────────┴────────┘
 ```
 ## Querying a LanceDB Table with Apache Datafusion
@@ -57,12 +54,7 @@ Register the table created with the Datafusion session context.
    --8<-- "python/python/tests/docs/test_guide_tables.py:lance_sql_basic"
    ```
-```
+| vector      | item | price |
-┌─────────────┬─────────┬────────┐
+| ----------- | ---- | ----- |
-│   vector    │  item   │ price  │
+| [3.1, 4.1]  | foo  | 10.0  |
-│   float[]   │ varchar │ double │
+| [5.9, 26.5] | bar  | 20.0  |
 ├─────────────┼─────────┼────────┤
 │ [3.1, 4.1]  │ foo     │   10.0 │
 │ [5.9, 26.5] │ bar     │   20.0 │
 └─────────────┴─────────┴────────┘
 ```
--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -45,6 +45,8 @@ Any attempt to use the connection after it is closed will result in an error.
 ### createEmptyTable()
 #### createEmptyTable(name, schema, options)
 ```ts
 abstract createEmptyTable(
   name,
@@ -54,7 +56,7 @@ abstract createEmptyTable(
 Creates a new empty Table
-#### Parameters
+##### Parameters
 * **name**: `string`
    The name of the table.
@@ -63,8 +65,39 @@ Creates a new empty Table
    The schema of the table
 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options (backwards compatibility)
-#### Returns
+##### Returns
 `Promise`&lt;[`Table`](Table.md)&gt;
 #### createEmptyTable(name, schema, namespace, options)
 ```ts
 abstract createEmptyTable(
   name,
   schema,
   namespace?,
   options?): Promise<Table>
 ```
 Creates a new empty Table
 ##### Parameters
 * **name**: `string`
    The name of the table.
 * **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
    The schema of the table
 * **namespace?**: `string`[]
    The namespace to create the table in (defaults to root namespace)
 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options
 ##### Returns
 `Promise`&lt;[`Table`](Table.md)&gt;
@@ -72,10 +105,10 @@ Creates a new empty Table
 ### createTable()
-#### createTable(options)
+#### createTable(options, namespace)
 ```ts
-abstract createTable(options): Promise<Table>
+abstract createTable(options, namespace?): Promise<Table>
 ```
 Creates a new Table and initialize it with new data.
@@ -85,6 +118,9 @@ Creates a new Table and initialize it with new data.
 * **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    The options object.
 * **namespace?**: `string`[]
    The namespace to create the table in (defaults to root namespace)
 ##### Returns
 `Promise`&lt;[`Table`](Table.md)&gt;
@@ -110,6 +146,38 @@ Creates a new Table and initialize it with new data.
    to be inserted into the table
 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options (backwards compatibility)
 ##### Returns
 `Promise`&lt;[`Table`](Table.md)&gt;
 #### createTable(name, data, namespace, options)
 ```ts
 abstract createTable(
   name,
   data,
   namespace?,
   options?): Promise<Table>
 ```
 Creates a new Table and initialize it with new data.
 ##### Parameters
 * **name**: `string`
    The name of the table.
 * **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
    Non-empty Array of Records
    to be inserted into the table
 * **namespace?**: `string`[]
    The namespace to create the table in (defaults to root namespace)
 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
    Additional options
 ##### Returns
@@ -134,11 +202,16 @@ Return a brief description of the connection
 ### dropAllTables()
 ```ts
-abstract dropAllTables(): Promise<void>
+abstract dropAllTables(namespace?): Promise<void>
 ```
 Drop all tables in the database.
 #### Parameters
 * **namespace?**: `string`[]
    The namespace to drop tables from (defaults to root namespace).
 #### Returns
 `Promise`&lt;`void`&gt;
@@ -148,7 +221,7 @@ Drop all tables in the database.
 ### dropTable()
 ```ts
-abstract dropTable(name): Promise<void>
+abstract dropTable(name, namespace?): Promise<void>
 ```
 Drop an existing table.
@@ -158,6 +231,9 @@ Drop an existing table.
 * **name**: `string`
    The name of the table to drop.
 * **namespace?**: `string`[]
    The namespace of the table (defaults to root namespace).
 #### Returns
 `Promise`&lt;`void`&gt;
@@ -181,7 +257,10 @@ Return true if the connection has not been closed
 ### openTable()
 ```ts
-abstract openTable(name, options?): Promise<Table>
+abstract openTable(
   name,
   namespace?,
   options?): Promise<Table>
 ```
 Open a table in the database.
@@ -191,7 +270,11 @@ Open a table in the database.
 * **name**: `string`
    The name of the table
 * **namespace?**: `string`[]
    The namespace of the table (defaults to root namespace)
 * **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
    Additional options
 #### Returns
@@ -201,6 +284,8 @@ Open a table in the database.
 ### tableNames()
 #### tableNames(options)
 ```ts
 abstract tableNames(options?): Promise<string[]>
 ```
@@ -209,12 +294,35 @@ List all the table names in this database.
 Tables will be returned in lexicographical order.
-#### Parameters
+##### Parameters
 * **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
    options to control the
    paging / start point (backwards compatibility)
 ##### Returns
 `Promise`&lt;`string`[]&gt;
 #### tableNames(namespace, options)
 ```ts
 abstract tableNames(namespace?, options?): Promise<string[]>
 ```
 List all the table names in this database.
 Tables will be returned in lexicographical order.
 ##### Parameters
 * **namespace?**: `string`[]
    The namespace to list tables from (defaults to root namespace)
 * **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
    options to control the
    paging / start point
-#### Returns
+##### Returns
 `Promise`&lt;`string`[]&gt;
--- a/docs/src/js/classes/HeaderProvider.md
+++ b/docs/src/js/classes/HeaderProvider.md
@@ -0,0 +1,85 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / HeaderProvider
 # Class: `abstract` HeaderProvider
 Abstract base class for providing custom headers for each request.
 Users can implement this interface to provide dynamic headers for various purposes
 such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
 custom metadata, or any other header-based requirements. The provider is called
 before each request to ensure fresh header values are always used.
 ## Examples
 Simple JWT token provider:
 ```typescript
 class JWTProvider extends HeaderProvider {
  constructor(private token: string) {
    super();
  }
  getHeaders(): Record<string, string> {
    return { authorization: `Bearer ${this.token}` };
  }
 }
 ```
 Provider with request tracking:
 ```typescript
 class RequestTrackingProvider extends HeaderProvider {
  constructor(private sessionId: string) {
    super();
  }
  getHeaders(): Record<string, string> {
    return {
      "X-Session-Id": this.sessionId,
      "X-Request-Id": `req-${Date.now()}`
    };
  }
 }
 ```
 ## Extended by
 - [`StaticHeaderProvider`](StaticHeaderProvider.md)
 - [`OAuthHeaderProvider`](OAuthHeaderProvider.md)
 ## Constructors
 ### new HeaderProvider()
 ```ts
 new HeaderProvider(): HeaderProvider
 ```
 #### Returns
 [`HeaderProvider`](HeaderProvider.md)
 ## Methods
 ### getHeaders()
 ```ts
 abstract getHeaders(): Record<string, string>
 ```
 Get the latest headers to be added to requests.
 This method is called before each request to the remote LanceDB server.
 Implementations should return headers that will be merged with existing headers.
 #### Returns
 `Record`&lt;`string`, `string`&gt;
 Dictionary of header names to values to add to the request.
 #### Throws
 If unable to fetch headers, the exception will be propagated and the request will fail.
--- a/docs/src/js/classes/MatchQuery.md
+++ b/docs/src/js/classes/MatchQuery.md
@@ -41,6 +41,7 @@ Creates an instance of MatchQuery.
    - `fuzziness`: The fuzziness level for the query (default is 0).
    - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
    - `operator`: The logical operator to use for combining terms in the query (default is "OR").
    - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
 * **options.boost?**: `number`
@@ -50,6 +51,8 @@ Creates an instance of MatchQuery.
 * **options.operator?**: [`Operator`](../enumerations/Operator.md)
 * **options.prefixLength?**: `number`
 #### Returns
 [`MatchQuery`](MatchQuery.md)
--- a/docs/src/js/classes/NativeJsHeaderProvider.md
+++ b/docs/src/js/classes/NativeJsHeaderProvider.md
@@ -0,0 +1,29 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / NativeJsHeaderProvider
 # Class: NativeJsHeaderProvider
 JavaScript HeaderProvider implementation that wraps a JavaScript callback.
 This is the only native header provider - all header provider implementations
 should provide a JavaScript function that returns headers.
 ## Constructors
 ### new NativeJsHeaderProvider()
 ```ts
 new NativeJsHeaderProvider(getHeadersCallback): NativeJsHeaderProvider
 ```
 Create a new JsHeaderProvider from a JavaScript callback
 #### Parameters
 * **getHeadersCallback**
 #### Returns
 [`NativeJsHeaderProvider`](NativeJsHeaderProvider.md)
--- a/docs/src/js/classes/OAuthHeaderProvider.md
+++ b/docs/src/js/classes/OAuthHeaderProvider.md
@@ -0,0 +1,108 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / OAuthHeaderProvider
 # Class: OAuthHeaderProvider
 Example implementation: OAuth token provider with automatic refresh.
 This is an example implementation showing how to manage OAuth tokens
 with automatic refresh when they expire.
 ## Example
 ```typescript
 async function fetchToken(): Promise<TokenResponse> {
  const response = await fetch("https://oauth.example.com/token", {
    method: "POST",
    body: JSON.stringify({
      grant_type: "client_credentials",
      client_id: "your-client-id",
      client_secret: "your-client-secret"
    }),
    headers: { "Content-Type": "application/json" }
  });
  const data = await response.json();
  return {
    accessToken: data.access_token,
    expiresIn: data.expires_in
  };
 }
 const provider = new OAuthHeaderProvider(fetchToken);
 const headers = provider.getHeaders();
 // Returns: {"authorization": "Bearer <your-token>"}
 ```
 ## Extends
 - [`HeaderProvider`](HeaderProvider.md)
 ## Constructors
 ### new OAuthHeaderProvider()
 ```ts
 new OAuthHeaderProvider(tokenFetcher, refreshBufferSeconds): OAuthHeaderProvider
 ```
 Initialize the OAuth provider.
 #### Parameters
 * **tokenFetcher**
    Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
 * **refreshBufferSeconds**: `number` = `300`
    Seconds before expiry to refresh token. Default 300 (5 minutes).
 #### Returns
 [`OAuthHeaderProvider`](OAuthHeaderProvider.md)
 #### Overrides
 [`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
 ## Methods
 ### getHeaders()
 ```ts
 getHeaders(): Record<string, string>
 ```
 Get OAuth headers, refreshing token if needed.
 Note: This is synchronous for now as the Rust implementation expects sync.
 In a real implementation, this would need to handle async properly.
 #### Returns
 `Record`&lt;`string`, `string`&gt;
 Headers with Bearer token authorization.
 #### Throws
 If unable to fetch or refresh token.
 #### Overrides
 [`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
 ***
 ### refreshToken()
 ```ts
 refreshToken(): Promise<void>
 ```
 Manually refresh the token.
 Call this before using getHeaders() to ensure token is available.
 #### Returns
 `Promise`&lt;`void`&gt;
--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -14,7 +14,7 @@ A builder for LanceDB queries.
 ## Extends
- [`QueryBase`](QueryBase.md)&lt;`NativeQuery`&gt;
+- `StandardQueryBase`&lt;`NativeQuery`&gt;
 ## Properties
@@ -26,7 +26,7 @@ protected inner: Query | Promise<Query>;
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
+`StandardQueryBase.inner`
 ## Methods
@@ -73,7 +73,7 @@ AnalyzeExec verbose=true, metrics=[]
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
+`StandardQueryBase.analyzePlan`
 ***
@@ -107,7 +107,7 @@ single query)
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
+`StandardQueryBase.execute`
 ***
@@ -143,7 +143,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
+`StandardQueryBase.explainPlan`
 ***
@@ -164,7 +164,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
+`StandardQueryBase.fastSearch`
 ***
@@ -194,7 +194,7 @@ Use `where` instead
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
+`StandardQueryBase.filter`
 ***
@@ -216,7 +216,7 @@ fullTextSearch(query, options?): this
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
+`StandardQueryBase.fullTextSearch`
 ***
@@ -241,7 +241,7 @@ called then every valid row from the table will be returned.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
+`StandardQueryBase.limit`
 ***
@@ -325,6 +325,10 @@ nearestToText(query, columns?): Query
 offset(offset): this
 ```
 Set the number of rows to skip before returning results.
 This is useful for pagination.
 #### Parameters
 * **offset**: `number`
@@ -335,7 +339,7 @@ offset(offset): this
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
+`StandardQueryBase.offset`
 ***
@@ -388,7 +392,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
+`StandardQueryBase.select`
 ***
@@ -410,7 +414,7 @@ Collect the results as an array of objects.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
+`StandardQueryBase.toArray`
 ***
@@ -436,7 +440,7 @@ ArrowTable.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
+`StandardQueryBase.toArrow`
 ***
@@ -471,7 +475,7 @@ on the filter column(s).
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
+`StandardQueryBase.where`
 ***
@@ -493,4 +497,4 @@ order to perform hybrid search.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
+`StandardQueryBase.withRowId`
--- a/docs/src/js/classes/QueryBase.md
+++ b/docs/src/js/classes/QueryBase.md
@@ -15,12 +15,11 @@ Common methods supported by all query types
 ## Extended by
- [`Query`](Query.md)
+- [`TakeQuery`](TakeQuery.md)
 - [`VectorQuery`](VectorQuery.md)
 ## Type Parameters
-• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery`
+• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery` \| `NativeTakeQuery`
 ## Implements
@@ -141,104 +140,6 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
 ***
 ### fastSearch()
 ```ts
 fastSearch(): this
 ```
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.
 Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
 #### Returns
 `this`
 ***
 ### ~~filter()~~
 ```ts
 filter(predicate): this
 ```
 A filter statement to be applied to this query.
 #### Parameters
 * **predicate**: `string`
 #### Returns
 `this`
 #### See
 where
 #### Deprecated
 Use `where` instead
 ***
 ### fullTextSearch()
 ```ts
 fullTextSearch(query, options?): this
 ```
 #### Parameters
 * **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
 * **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;
 #### Returns
 `this`
 ***
 ### limit()
 ```ts
 limit(limit): this
 ```
 Set the maximum number of results to return.
 By default, a plain search has no limit.  If this method is not
 called then every valid row from the table will be returned.
 #### Parameters
 * **limit**: `number`
 #### Returns
 `this`
 ***
 ### offset()
 ```ts
 offset(offset): this
 ```
 #### Parameters
 * **offset**: `number`
 #### Returns
 `this`
 ***
 ### select()
 ```ts
@@ -328,37 +229,6 @@ ArrowTable.
 ***
 ### where()
 ```ts
 where(predicate): this
 ```
 A filter statement to be applied to this query.
 The filter should be supplied as an SQL query string.  For example:
 #### Parameters
 * **predicate**: `string`
 #### Returns
 `this`
 #### Example
 ```ts
 x > 10
 y > 0 AND y < 100
 x > 5 OR y = 'test'
 Filtering performance can often be improved by creating a scalar index
 on the filter column(s).
 ```
 ***
 ### withRowId()
 ```ts
--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -0,0 +1,88 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / Session
 # Class: Session
 A session for managing caches and object stores across LanceDB operations.
 Sessions allow you to configure cache sizes for index and metadata caches,
 which can significantly impact memory use and performance. They can
 also be re-used across multiple connections to share the same cache state.
 ## Constructors
 ### new Session()
 ```ts
 new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
 ```
 Create a new session with custom cache sizes.
 # Parameters
 - `index_cache_size_bytes`: The size of the index cache in bytes.
  Index data is stored in memory in this cache to speed up queries.
  Defaults to 6GB if not specified.
 - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
  The metadata cache stores file metadata and schema information in memory.
  This cache improves scan and write performance.
  Defaults to 1GB if not specified.
 #### Parameters
 * **indexCacheSizeBytes?**: `null` \| `bigint`
 * **metadataCacheSizeBytes?**: `null` \| `bigint`
 #### Returns
 [`Session`](Session.md)
 ## Methods
 ### approxNumItems()
 ```ts
 approxNumItems(): number
 ```
 Get the approximate number of items cached in the session.
 #### Returns
 `number`
 ***
 ### sizeBytes()
 ```ts
 sizeBytes(): bigint
 ```
 Get the current size of the session caches in bytes.
 #### Returns
 `bigint`
 ***
 ### default()
 ```ts
 static default(): Session
 ```
 Create a session with default cache sizes.
 This is equivalent to creating a session with 6GB index cache
 and 1GB metadata cache.
 #### Returns
 [`Session`](Session.md)
--- a/docs/src/js/classes/StaticHeaderProvider.md
+++ b/docs/src/js/classes/StaticHeaderProvider.md
@@ -0,0 +1,70 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / StaticHeaderProvider
 # Class: StaticHeaderProvider
 Example implementation: A simple header provider that returns static headers.
 This is an example implementation showing how to create a HeaderProvider
 for cases where headers don't change during the session.
 ## Example
 ```typescript
 const provider = new StaticHeaderProvider({
  authorization: "Bearer my-token",
  "X-Custom-Header": "custom-value"
 });
 const headers = provider.getHeaders();
 // Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
 ```
 ## Extends
 - [`HeaderProvider`](HeaderProvider.md)
 ## Constructors
 ### new StaticHeaderProvider()
 ```ts
 new StaticHeaderProvider(headers): StaticHeaderProvider
 ```
 Initialize with static headers.
 #### Parameters
 * **headers**: `Record`&lt;`string`, `string`&gt;
    Headers to return for every request.
 #### Returns
 [`StaticHeaderProvider`](StaticHeaderProvider.md)
 #### Overrides
 [`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
 ## Methods
 ### getHeaders()
 ```ts
 getHeaders(): Record<string, string>
 ```
 Return the static headers.
 #### Returns
 `Record`&lt;`string`, `string`&gt;
 Copy of the static headers.
 #### Overrides
 [`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -612,7 +612,7 @@ of the given query
 #### Parameters
-* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
    the query, a vector or string
 * **queryType?**: `string`
@@ -674,6 +674,48 @@ console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
 ***
 ### takeOffsets()
 ```ts
 abstract takeOffsets(offsets): TakeQuery
 ```
 Create a query that returns a subset of the rows in the table.
 #### Parameters
 * **offsets**: `number`[]
    The offsets of the rows to return.
 #### Returns
 [`TakeQuery`](TakeQuery.md)
 A builder that can be used to parameterize the query.
 ***
 ### takeRowIds()
 ```ts
 abstract takeRowIds(rowIds): TakeQuery
 ```
 Create a query that returns a subset of the rows in the table.
 #### Parameters
 * **rowIds**: `number`[]
    The row ids of the rows to return.
 #### Returns
 [`TakeQuery`](TakeQuery.md)
 A builder that can be used to parameterize the query.
 ***
 ### toArrow()
 ```ts
@@ -799,7 +841,7 @@ by `query`.
 #### Parameters
-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)
 #### Returns
--- a/docs/src/js/classes/TakeQuery.md
+++ b/docs/src/js/classes/TakeQuery.md
@@ -0,0 +1,265 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / TakeQuery
 # Class: TakeQuery
 A query that returns a subset of the rows in the table.
 ## Extends
 - [`QueryBase`](QueryBase.md)&lt;`NativeTakeQuery`&gt;
 ## Properties
 ### inner
 ```ts
 protected inner: TakeQuery | Promise<TakeQuery>;
 ```
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
 ## Methods
 ### analyzePlan()
 ```ts
 analyzePlan(): Promise<string>
 ```
 Executes the query and returns the physical query plan annotated with runtime metrics.
 This is useful for debugging and performance analysis, as it shows how the query was executed
 and includes metrics such as elapsed time, rows processed, and I/O statistics.
 #### Returns
 `Promise`&lt;`string`&gt;
 A query execution plan with runtime metrics for each step.
 #### Example
 ```ts
 import * as lancedb from "@lancedb/lancedb"
 const db = await lancedb.connect("./.lancedb");
 const table = await db.createTable("my_table", [
  { vector: [1.1, 0.9], id: "1" },
 ]);
 const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
 Example output (with runtime metrics inlined):
 AnalyzeExec verbose=true, metrics=[]
 ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
  Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
   CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
    GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
     FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
      SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
       KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
        LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
 ```
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
 ***
 ### execute()
 ```ts
 protected execute(options?): RecordBatchIterator
 ```
 Execute the query and return the results as an
 #### Parameters
 * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
 #### Returns
 [`RecordBatchIterator`](RecordBatchIterator.md)
 #### See
 - AsyncIterator
 of
 - RecordBatch.
 By default, LanceDb will use many threads to calculate results and, when
 the result set is large, multiple batches will be processed at one time.
 This readahead is limited however and backpressure will be applied if this
 stream is consumed slowly (this constrains the maximum memory used by a
 single query)
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
 ***
 ### explainPlan()
 ```ts
 explainPlan(verbose): Promise<string>
 ```
 Generates an explanation of the query execution plan.
 #### Parameters
 * **verbose**: `boolean` = `false`
    If true, provides a more detailed explanation. Defaults to false.
 #### Returns
 `Promise`&lt;`string`&gt;
 A Promise that resolves to a string containing the query execution plan explanation.
 #### Example
 ```ts
 import * as lancedb from "@lancedb/lancedb"
 const db = await lancedb.connect("./.lancedb");
 const table = await db.createTable("my_table", [
  { vector: [1.1, 0.9], id: "1" },
 ]);
 const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
 ```
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
 ***
 ### select()
 ```ts
 select(columns): this
 ```
 Return only the specified columns.
 By default a query will return all columns from the table.  However, this can have
 a very significant impact on latency.  LanceDb stores data in a columnar fashion.  This
 means we can finely tune our I/O to select exactly the columns we need.
 As a best practice you should always limit queries to the columns that you need.  If you
 pass in an array of column names then only those columns will be returned.
 You can also use this method to create new "dynamic" columns based on your existing columns.
 For example, you may not care about "a" or "b" but instead simply want "a + b".  This is often
 seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
 To create dynamic columns you can pass in a Map<string, string>.  A column will be returned
 for each entry in the map.  The key provides the name of the column.  The value is
 an SQL string used to specify how the column is calculated.
 For example, an SQL query might state `SELECT a + b AS combined, c`.  The equivalent
 input to this method would be:
 #### Parameters
 * **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
 #### Returns
 `this`
 #### Example
 ```ts
 new Map([["combined", "a + b"], ["c", "c"]])
 Columns will always be returned in the order given, even if that order is different than
 the order used when adding the data.
 Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
 uses `Object.entries` which should preserve the insertion order of the object.  However,
 object insertion order is easy to get wrong and `Map` is more foolproof.
 ```
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
 ***
 ### toArray()
 ```ts
 toArray(options?): Promise<any[]>
 ```
 Collect the results as an array of objects.
 #### Parameters
 * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
 #### Returns
 `Promise`&lt;`any`[]&gt;
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
 ***
 ### toArrow()
 ```ts
 toArrow(options?): Promise<Table<any>>
 ```
 Collect the results as an Arrow
 #### Parameters
 * **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
 #### Returns
 `Promise`&lt;`Table`&lt;`any`&gt;&gt;
 #### See
 ArrowTable.
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
 ***
 ### withRowId()
 ```ts
 withRowId(): this
 ```
 Whether to return the row id in the results.
 This column can be used to match results between different queries. For
 example, to match results from a full text search and a vector search in
 order to perform hybrid search.
 #### Returns
 `this`
 #### Inherited from
 [`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -16,7 +16,7 @@ This builder can be reused to execute the query many times.
 ## Extends
- [`QueryBase`](QueryBase.md)&lt;`NativeVectorQuery`&gt;
+- `StandardQueryBase`&lt;`NativeVectorQuery`&gt;
 ## Properties
@@ -28,7 +28,7 @@ protected inner: VectorQuery | Promise<VectorQuery>;
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
+`StandardQueryBase.inner`
 ## Methods
@@ -91,7 +91,7 @@ AnalyzeExec verbose=true, metrics=[]
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
+`StandardQueryBase.analyzePlan`
 ***
@@ -248,7 +248,7 @@ single query)
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
+`StandardQueryBase.execute`
 ***
@@ -284,7 +284,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
+`StandardQueryBase.explainPlan`
 ***
@@ -305,7 +305,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
+`StandardQueryBase.fastSearch`
 ***
@@ -335,7 +335,7 @@ Use `where` instead
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
+`StandardQueryBase.filter`
 ***
@@ -357,7 +357,7 @@ fullTextSearch(query, options?): this
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
+`StandardQueryBase.fullTextSearch`
 ***
@@ -382,7 +382,54 @@ called then every valid row from the table will be returned.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
+`StandardQueryBase.limit`
 ***
 ### maximumNprobes()
 ```ts
 maximumNprobes(maximumNprobes): VectorQuery
 ```
 Set the maximum number of probes used.
 This controls the maximum number of partitions that will be searched.  If this
 number is greater than minimumNprobes then the excess partitions will _only_ be
 searched if we have not found enough results.  This can be useful when there is
 a narrow filter to allow these queries to spend more time searching and avoid
 potential false negatives.
 #### Parameters
 * **maximumNprobes**: `number`
 #### Returns
 [`VectorQuery`](VectorQuery.md)
 ***
 ### minimumNprobes()
 ```ts
 minimumNprobes(minimumNprobes): VectorQuery
 ```
 Set the minimum number of probes used.
 This controls the minimum number of partitions that will be searched.  This
 parameter will impact every query against a vector index, regardless of the
 filter.  See `nprobes` for more details.  Higher values will increase recall
 but will also increase latency.
 #### Parameters
 * **minimumNprobes**: `number`
 #### Returns
 [`VectorQuery`](VectorQuery.md)
 ***
@@ -413,6 +460,10 @@ For best results we recommend tuning this parameter with a benchmark against
 your actual data to find the smallest possible value that will still give
 you the desired recall.
 For more fine grained control over behavior when you have a very narrow filter
 you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
 the minimum and maximum to the same value.
 #### Parameters
 * **nprobes**: `number`
@@ -429,6 +480,10 @@ you the desired recall.
 offset(offset): this
 ```
 Set the number of rows to skip before returning results.
 This is useful for pagination.
 #### Parameters
 * **offset**: `number`
@@ -439,7 +494,7 @@ offset(offset): this
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
+`StandardQueryBase.offset`
 ***
@@ -586,7 +641,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
+`StandardQueryBase.select`
 ***
@@ -608,7 +663,7 @@ Collect the results as an array of objects.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
+`StandardQueryBase.toArray`
 ***
@@ -634,7 +689,7 @@ ArrowTable.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
+`StandardQueryBase.toArrow`
 ***
@@ -669,7 +724,7 @@ on the filter column(s).
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
+`StandardQueryBase.where`
 ***
@@ -691,4 +746,4 @@ order to perform hybrid search.
 #### Inherited from
-[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
+`StandardQueryBase.withRowId`
--- a/docs/src/js/enumerations/Occur.md
+++ b/docs/src/js/enumerations/Occur.md
@@ -10,6 +10,7 @@ Enum representing the occurrence of terms in full-text queries.
 - `Must`: The term must be present in the document.
 - `Should`: The term should contribute to the document score, but is not required.
 - `MustNot`: The term must not be present in the document.
 ## Enumeration Members
@@ -21,6 +22,14 @@ Must: "MUST";
 ***
 ### MustNot
 ```ts
 MustNot: "MUST_NOT";
 ```
 ***
 ### Should
 ```ts
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,10 +6,14 @@
 # Function: connect()
-## connect(uri, options)
+## connect(uri, options, session, headerProvider)
 ```ts
-function connect(uri, options?): Promise<Connection>
+function connect(
   uri,
   options?,
   session?,
   headerProvider?): Promise<Connection>
 ```
 Connect to a LanceDB instance at the given URI.
@@ -29,6 +33,10 @@ Accepted formats:
 * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database
 * **session?**: [`Session`](../classes/Session.md)
 * **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`&lt;`string`, `string`&gt; \| () => `Promise`&lt;`Record`&lt;`string`, `string`&gt;&gt;
 ### Returns
 `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -50,6 +58,18 @@ const conn = await connect(
 });
 ```
 Using with a header provider for per-request authentication:
 ```ts
 const provider = new StaticHeaderProvider({
  "X-API-Key": "my-key"
 });
 const conn = await connectWithHeaderProvider(
  "db://host:port",
  options,
  provider
 );
 ```
 ## connect(options)
 ```ts
@@ -77,7 +97,7 @@ Accepted formats:
 [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
-### Example
+### Examples
 ```ts
 const conn = await connect({
@@ -85,3 +105,11 @@ const conn = await connect({
  storageOptions: {timeout: "60s"}
 });
 ```
 ```ts
 const session = Session.default();
 const conn = await connect({
  uri: "/path/to/database",
  session: session
 });
 ```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -20,18 +20,24 @@
 - [BooleanQuery](classes/BooleanQuery.md)
 - [BoostQuery](classes/BoostQuery.md)
 - [Connection](classes/Connection.md)
 - [HeaderProvider](classes/HeaderProvider.md)
 - [Index](classes/Index.md)
 - [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
 - [MatchQuery](classes/MatchQuery.md)
 - [MergeInsertBuilder](classes/MergeInsertBuilder.md)
 - [MultiMatchQuery](classes/MultiMatchQuery.md)
 - [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
 - [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
 - [PhraseQuery](classes/PhraseQuery.md)
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
 - [Session](classes/Session.md)
 - [StaticHeaderProvider](classes/StaticHeaderProvider.md)
 - [Table](classes/Table.md)
 - [TagContents](classes/TagContents.md)
 - [Tags](classes/Tags.md)
 - [TakeQuery](classes/TakeQuery.md)
 - [VectorColumnOptions](classes/VectorColumnOptions.md)
 - [VectorQuery](classes/VectorQuery.md)
@@ -72,6 +78,7 @@
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
 - [TokenResponse](interfaces/TokenResponse.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
 - [UpdateResult](interfaces/UpdateResult.md)
 - [Version](interfaces/Version.md)
@@ -84,6 +91,7 @@
 - [FieldLike](type-aliases/FieldLike.md)
 - [IntoSql](type-aliases/IntoSql.md)
 - [IntoVector](type-aliases/IntoVector.md)
 - [MultiVector](type-aliases/MultiVector.md)
 - [RecordBatchLike](type-aliases/RecordBatchLike.md)
 - [SchemaLike](type-aliases/SchemaLike.md)
 - [TableLike](type-aliases/TableLike.md)
--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -16,6 +16,14 @@ optional extraHeaders: Record<string, string>;
 ***
 ### idDelimiter?
 ```ts
 optional idDelimiter: string;
 ```
 ***
 ### retryConfig?
 ```ts
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,6 +70,17 @@ Defaults to 'us-east-1'.
 ***
 ### session?
 ```ts
 optional session: Session;
 ```
 (For LanceDB OSS only): the session to use for this connection. Holds
 shared caches and other session-specific state.
 ***
 ### storageOptions?
 ```ts
--- a/docs/src/js/interfaces/FtsOptions.md
+++ b/docs/src/js/interfaces/FtsOptions.md
@@ -23,7 +23,7 @@ whether to remove punctuation
 ### baseTokenizer?
 ```ts
-optional baseTokenizer: "raw" | "simple" | "whitespace";
+optional baseTokenizer: "raw" | "simple" | "whitespace" | "ngram";
 ```
 The tokenizer to use when building the index.
@@ -71,6 +71,36 @@ tokens longer than this length will be ignored
 ***
 ### ngramMaxLength?
 ```ts
 optional ngramMaxLength: number;
 ```
 ngram max length
 ***
 ### ngramMinLength?
 ```ts
 optional ngramMinLength: number;
 ```
 ngram min length
 ***
 ### prefixOnly?
 ```ts
 optional prefixOnly: boolean;
 ```
 whether to only index the prefix of the token for ngram tokenizer
 ***
 ### removeStopWords?
 ```ts
--- a/docs/src/js/interfaces/IndexOptions.md
+++ b/docs/src/js/interfaces/IndexOptions.md
@@ -26,6 +26,18 @@ will be used to determine the most useful kind of index to create.
 ***
 ### name?
 ```ts
 optional name: string;
 ```
 Optional custom name for the index.
 If not provided, a default name will be generated based on the column name.
 ***
 ### replace?
 ```ts
@@ -42,8 +54,27 @@ The default is true
 ***
 ### train?
 ```ts
 optional train: boolean;
 ```
 Whether to train the index with existing data.
 If true (default), the index will be trained with existing data in the table.
 If false, the index will be created empty and populated as new data is added.
 Note: This option is only supported for scalar indices. Vector indices always train.
 ***
 ### waitTimeoutSeconds?
 ```ts
 optional waitTimeoutSeconds: number;
 ```
 Timeout in seconds to wait for index creation to complete.
 If not specified, the method will return immediately after starting the index creation.
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -8,7 +8,7 @@
 ## Properties
-### indexCacheSize?
+### ~~indexCacheSize?~~
 ```ts
 optional indexCacheSize: number;
@@ -16,6 +16,11 @@ optional indexCacheSize: number;
 Set the size of the index cache, specified as a number of entries
 #### Deprecated
 Use session-level cache configuration instead.
 Create a Session with custom cache sizes and pass it to the connect() function.
 The exact meaning of an "entry" will depend on the type of index:
 - IVF: there is one entry for each IVF partition
 - BTREE: there is one entry for the entire index
--- a/docs/src/js/interfaces/OptimizeOptions.md
+++ b/docs/src/js/interfaces/OptimizeOptions.md
@@ -24,10 +24,10 @@ The default is 7 days
 // Delete all versions older than 1 day
 const olderThan = new Date();
 olderThan.setDate(olderThan.getDate() - 1));
-tbl.cleanupOlderVersions(olderThan);
+tbl.optimize({cleanupOlderThan: olderThan});
 // Delete all versions except the current version
-tbl.cleanupOlderVersions(new Date());
+tbl.optimize({cleanupOlderThan: new Date()});
 ```
 ***
--- a/docs/src/js/interfaces/TimeoutConfig.md
+++ b/docs/src/js/interfaces/TimeoutConfig.md
@@ -44,3 +44,17 @@ optional readTimeout: number;
 The timeout for reading data from the server in seconds. Default is 300
 seconds (5 minutes). This can also be set via the environment variable
 `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
 ***
 ### timeout?
 ```ts
 optional timeout: number;
 ```
 The overall timeout for the entire request in seconds. This includes
 connection, send, and read time. If the entire request doesn't complete
 within this time, it will fail. Default is None (no overall timeout).
 This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
 as an integer number of seconds.
--- a/docs/src/js/interfaces/TokenResponse.md
+++ b/docs/src/js/interfaces/TokenResponse.md
@@ -0,0 +1,25 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / TokenResponse
 # Interface: TokenResponse
 Token response from OAuth provider.
 ## Properties
 ### accessToken
 ```ts
 accessToken: string;
 ```
 ***
 ### expiresIn?
 ```ts
 optional expiresIn: number;
 ```
--- a/docs/src/js/type-aliases/MultiVector.md
+++ b/docs/src/js/type-aliases/MultiVector.md
@@ -0,0 +1,11 @@
 [**@lancedb/lancedb**](../README.md) • **Docs**
 ***
 [@lancedb/lancedb](../globals.md) / MultiVector
 # Type Alias: MultiVector
 ```ts
 type MultiVector: IntoVector[];
 ```
--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
@@ -428,7 +428,7 @@
        "\n",
        "**Why?**  \n",
        "Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time:  \n",
-        "- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search.  \n",
+        "- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search.  \n",
        "- **Step 5a** contains the full ingestion code for reference (run it only if necessary).  \n",
        "- **Step 6** contains the details on creating the index on the multivector column"
      ]
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -30,7 +30,8 @@ excluded_globs = [
    "../src/rag/advanced_techniques/*.md",
    "../src/guides/scalar_index.md",
    "../src/guides/storage.md",
-    "../src/search.md"
+    "../src/search.md",
    "../src/guides/sql_querying.md",
 ]
 python_prefix = "py"
--- a/java/.mvn/wrapper/maven-wrapper.properties
+++ b/java/.mvn/wrapper/maven-wrapper.properties
@@ -0,0 +1,19 @@
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 wrapperVersion=3.3.2
 distributionType=only-script
 distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
--- a/java/README.md
+++ b/java/README.md
@@ -0,0 +1,37 @@
 # LanceDB Java SDK
 ## Configuration and Initialization
 ### LanceDB Cloud
 For LanceDB Cloud, use the simplified builder API:
 ```java
 import com.lancedb.lance.namespace.LanceRestNamespace;
 // If your DB url is db://example-db, then your database here is example-db
 LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
    .apiKey("your_lancedb_cloud_api_key")
    .database("your_database_name")
    .build();
 ```
 ### LanceDB Enterprise
 For Enterprise deployments, use your VPC endpoint:
 ```java
 LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
    .apiKey("your_lancedb_enterprise_api_key")
    .database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
    .hostOverride("http://<vpc_endpoint_dns_name>:80")
    .build();
 ```
 ## Development
 Build:
 ```shell
 ./mvnw install
 ```
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -15,13 +15,16 @@ publish = false
 crate-type = ["cdylib"]
 [dependencies]
-lancedb = { path = "../../../rust/lancedb" }
+lancedb = { path = "../../../rust/lancedb", default-features = false }
 lance = { workspace = true }
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema.workspace = true
-tokio = "1.23"
+tokio = "1.46"
 jni = "0.21.1"
 snafu.workspace = true
 lazy_static.workspace = true
 serde = { version = "^1" }
 serde_json = { version = "1" }
 [features]
 default = ["lancedb/default"]
--- a/java/core/lancedb-jni/src/ffi.rs
+++ b/java/core/lancedb-jni/src/ffi.rs
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
    fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
    /// Get strings from Java List<String> object.
    #[allow(dead_code)]
    fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
    /// Get strings from Java String[] object.
--- a/java/core/lancedb-jni/src/traits.rs
+++ b/java/core/lancedb-jni/src/traits.rs
@@ -6,6 +6,7 @@ use jni::JNIEnv;
 use crate::Result;
 #[allow(dead_code)]
 pub trait FromJObject<T> {
    fn extract(&self) -> Result<T>;
 }
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
    }
 }
 #[allow(dead_code)]
 pub trait FromJString {
    fn extract(&self, env: &mut JNIEnv) -> Result<String>;
 }
@@ -66,6 +68,7 @@ pub trait JMapExt {
    fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
 }
 #[allow(dead_code)]
 fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
 where
    for<'a> JObject<'a>: FromJObject<T>,
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,18 +8,24 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.20.1-beta.2</version>
+        <version>0.22.1-beta.2</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
    <artifactId>lancedb-core</artifactId>
-    <name>LanceDB Core</name>
+    <name>${project.artifactId}</name>
    <description>LanceDB Core</description>
    <packaging>jar</packaging>
    <properties>
        <rust.release.build>false</rust.release.build>
    </properties>
    <dependencies>
        <dependency>
            <groupId>com.lancedb</groupId>
            <artifactId>lance-namespace-core</artifactId>
            <version>0.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -0,0 +1,26 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
        <version>0.22.1-beta.2</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
    <artifactId>lancedb-lance-namespace</artifactId>
    <name>${project.artifactId}</name>
    <description>LanceDB Java Integration with Lance Namespace</description>
    <packaging>jar</packaging>
    <dependencies>
        <dependency>
            <groupId>com.lancedb</groupId>
            <artifactId>lance-namespace-core</artifactId>
        </dependency>
    </dependencies>
 </project>
--- a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
+++ b/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
@@ -0,0 +1,146 @@
 /*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package com.lancedb.lancedb;
 import com.lancedb.lance.namespace.LanceRestNamespace;
 import com.lancedb.lance.namespace.client.apache.ApiClient;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Optional;
 /** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
 public class LanceDbRestNamespaces {
  private static final String DEFAULT_REGION = "us-east-1";
  private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
  private String apiKey;
  private String database;
  private Optional<String> hostOverride = Optional.empty();
  private Optional<String> region = Optional.empty();
  private Map<String, String> additionalConfig = new HashMap<>();
  private LanceDbRestNamespaces() {}
  /**
   * Create a new builder instance.
   *
   * @return A new LanceRestNamespaceBuilder
   */
  public static LanceDbRestNamespaces builder() {
    return new LanceDbRestNamespaces();
  }
  /**
   * Set the API key (required).
   *
   * @param apiKey The LanceDB API key
   * @return This builder
   */
  public LanceDbRestNamespaces apiKey(String apiKey) {
    if (apiKey == null || apiKey.trim().isEmpty()) {
      throw new IllegalArgumentException("API key cannot be null or empty");
    }
    this.apiKey = apiKey;
    return this;
  }
  /**
   * Set the database name (required).
   *
   * @param database The database name
   * @return This builder
   */
  public LanceDbRestNamespaces database(String database) {
    if (database == null || database.trim().isEmpty()) {
      throw new IllegalArgumentException("Database cannot be null or empty");
    }
    this.database = database;
    return this;
  }
  /**
   * Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
   * construction. Use this for LanceDB Enterprise deployments.
   *
   * @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
   * @return This builder
   */
  public LanceDbRestNamespaces hostOverride(String hostOverride) {
    this.hostOverride = Optional.ofNullable(hostOverride);
    return this;
  }
  /**
   * Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
   * ignored when hostOverride is set.
   *
   * @param region The AWS region (e.g., "us-east-1", "eu-west-1")
   * @return This builder
   */
  public LanceDbRestNamespaces region(String region) {
    this.region = Optional.ofNullable(region);
    return this;
  }
  /**
   * Add additional configuration parameters.
   *
   * @param key The configuration key
   * @param value The configuration value
   * @return This builder
   */
  public LanceDbRestNamespaces config(String key, String value) {
    this.additionalConfig.put(key, value);
    return this;
  }
  /**
   * Build the LanceRestNamespace instance.
   *
   * @return A configured LanceRestNamespace
   * @throws IllegalStateException if required parameters are missing
   */
  public LanceRestNamespace build() {
    // Validate required fields
    if (apiKey == null) {
      throw new IllegalStateException("API key is required");
    }
    if (database == null) {
      throw new IllegalStateException("Database is required");
    }
    // Build configuration map
    Map<String, String> config = new HashMap<>(additionalConfig);
    config.put("headers.x-lancedb-database", database);
    config.put("headers.x-api-key", apiKey);
    // Determine base URL
    String baseUrl;
    if (hostOverride.isPresent()) {
      baseUrl = hostOverride.get();
      config.put("host_override", hostOverride.get());
    } else {
      String effectiveRegion = region.orElse(DEFAULT_REGION);
      baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
      config.put("region", effectiveRegion);
    }
    // Create and configure ApiClient
    ApiClient apiClient = new ApiClient();
    apiClient.setBasePath(baseUrl);
    return new LanceRestNamespace(apiClient, config);
  }
 }
--- a/java/mvnw
+++ b/java/mvnw
@@ -0,0 +1,259 @@
 #!/bin/sh
 # ----------------------------------------------------------------------------
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 # ----------------------------------------------------------------------------
 # ----------------------------------------------------------------------------
 # Apache Maven Wrapper startup batch script, version 3.3.2
 #
 # Optional ENV vars
 # -----------------
 #   JAVA_HOME - location of a JDK home dir, required when download maven via java source
 #   MVNW_REPOURL - repo url base for downloading maven distribution
 #   MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
 #   MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
 # ----------------------------------------------------------------------------
 set -euf
 [ "${MVNW_VERBOSE-}" != debug ] || set -x
 # OS specific support.
 native_path() { printf %s\\n "$1"; }
 case "$(uname)" in
 CYGWIN* | MINGW*)
  [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
  native_path() { cygpath --path --windows "$1"; }
  ;;
 esac
 # set JAVACMD and JAVACCMD
 set_java_home() {
  # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
  if [ -n "${JAVA_HOME-}" ]; then
    if [ -x "$JAVA_HOME/jre/sh/java" ]; then
      # IBM's JDK on AIX uses strange locations for the executables
      JAVACMD="$JAVA_HOME/jre/sh/java"
      JAVACCMD="$JAVA_HOME/jre/sh/javac"
    else
      JAVACMD="$JAVA_HOME/bin/java"
      JAVACCMD="$JAVA_HOME/bin/javac"
      if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
        echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
        echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
        return 1
      fi
    fi
  else
    JAVACMD="$(
      'set' +e
      'unset' -f command 2>/dev/null
      'command' -v java
    )" || :
    JAVACCMD="$(
      'set' +e
      'unset' -f command 2>/dev/null
      'command' -v javac
    )" || :
    if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
      echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
      return 1
    fi
  fi
 }
 # hash string like Java String::hashCode
 hash_string() {
  str="${1:-}" h=0
  while [ -n "$str" ]; do
    char="${str%"${str#?}"}"
    h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
    str="${str#?}"
  done
  printf %x\\n $h
 }
 verbose() { :; }
 [ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
 die() {
  printf %s\\n "$1" >&2
  exit 1
 }
 trim() {
  # MWRAPPER-139:
  #   Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
  #   Needed for removing poorly interpreted newline sequences when running in more
  #   exotic environments such as mingw bash on Windows.
  printf "%s" "${1}" | tr -d '[:space:]'
 }
 # parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
 while IFS="=" read -r key value; do
  case "${key-}" in
  distributionUrl) distributionUrl=$(trim "${value-}") ;;
  distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
  esac
 done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
 [ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
 case "${distributionUrl##*/}" in
 maven-mvnd-*bin.*)
  MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
  case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
  *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
  :Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
  :Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
  :Linux*x86_64*) distributionPlatform=linux-amd64 ;;
  *)
    echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
    distributionPlatform=linux-amd64
    ;;
  esac
  distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
  ;;
 maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
 *) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
 esac
 # apply MVNW_REPOURL and calculate MAVEN_HOME
 # maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
 [ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
 distributionUrlName="${distributionUrl##*/}"
 distributionUrlNameMain="${distributionUrlName%.*}"
 distributionUrlNameMain="${distributionUrlNameMain%-bin}"
 MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
 MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
 exec_maven() {
  unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
  exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
 }
 if [ -d "$MAVEN_HOME" ]; then
  verbose "found existing MAVEN_HOME at $MAVEN_HOME"
  exec_maven "$@"
 fi
 case "${distributionUrl-}" in
 *?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
 *) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
 esac
 # prepare tmp dir
 if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
  clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
  trap clean HUP INT TERM EXIT
 else
  die "cannot create temp dir"
 fi
 mkdir -p -- "${MAVEN_HOME%/*}"
 # Download and Install Apache Maven
 verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
 verbose "Downloading from: $distributionUrl"
 verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
 # select .zip or .tar.gz
 if ! command -v unzip >/dev/null; then
  distributionUrl="${distributionUrl%.zip}.tar.gz"
  distributionUrlName="${distributionUrl##*/}"
 fi
 # verbose opt
 __MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
 [ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
 # normalize http auth
 case "${MVNW_PASSWORD:+has-password}" in
 '') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
 has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
 esac
 if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
  verbose "Found wget ... using wget"
  wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
 elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
  verbose "Found curl ... using curl"
  curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
 elif set_java_home; then
  verbose "Falling back to use Java to download"
  javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
  targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
  cat >"$javaSource" <<-END
 	public class Downloader extends java.net.Authenticator
 	{
 	  protected java.net.PasswordAuthentication getPasswordAuthentication()
 	  {
 	    return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
 	  }
 	  public static void main( String[] args ) throws Exception
 	  {
 	    setDefault( new Downloader() );
 	    java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
 	  }
 	}
 	END
  # For Cygwin/MinGW, switch paths to Windows format before running javac and java
  verbose " - Compiling Downloader.java ..."
  "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
  verbose " - Running Downloader.java ..."
  "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
 fi
 # If specified, validate the SHA-256 sum of the Maven distribution zip file
 if [ -n "${distributionSha256Sum-}" ]; then
  distributionSha256Result=false
  if [ "$MVN_CMD" = mvnd.sh ]; then
    echo "Checksum validation is not supported for maven-mvnd." >&2
    echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
    exit 1
  elif command -v sha256sum >/dev/null; then
    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
      distributionSha256Result=true
    fi
  elif command -v shasum >/dev/null; then
    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
      distributionSha256Result=true
    fi
  else
    echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
    echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
    exit 1
  fi
  if [ $distributionSha256Result = false ]; then
    echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
    echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
    exit 1
  fi
 fi
 # unzip and move
 if command -v unzip >/dev/null; then
  unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
 else
  tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
 fi
 printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
 mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
 clean || :
 exec_maven "$@"
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,11 +6,10 @@
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.20.1-beta.2</version>
+    <version>0.22.1-beta.2</version>
    <packaging>pom</packaging>
-
+    <name>${project.artifactId}</name>
-    <name>LanceDB Parent</name>
+    <description>LanceDB Java SDK Parent POM</description>
    <description>LanceDB vector database Java API</description>
    <url>http://lancedb.com/</url>
    <developers>
@@ -29,6 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
        <lance-namespace.verison>0.0.1</lance-namespace.verison>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -52,6 +52,7 @@
    <modules>
        <module>core</module>
        <module>lance-namespace</module>
    </modules>
    <scm>
@@ -62,6 +63,11 @@
    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>com.lancedb</groupId>
                <artifactId>lance-namespace-core</artifactId>
                <version>${lance-namespace.verison}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.arrow</groupId>
                <artifactId>arrow-vector</artifactId>
--- a/node/.eslintrc.js
+++ b/node/.eslintrc.js
@@ -1,22 +0,0 @@
 module.exports = {
  env: {
    browser: true,
    es2021: true
  },
  extends: 'standard-with-typescript',
  overrides: [
  ],
  parserOptions: {
    project: './tsconfig.json',
    ecmaVersion: 'latest',
    sourceType: 'module'
  },
  rules: {
    "@typescript-eslint/method-signature-style": "off",
    "@typescript-eslint/quotes": "off",
    "@typescript-eslint/semi": "off",
    "@typescript-eslint/explicit-function-return-type": "off",
    "@typescript-eslint/space-before-function-paren": "off",
    "@typescript-eslint/indent": "off",
  }
 }
--- a/node/.npmignore
+++ b/node/.npmignore
@@ -1,4 +0,0 @@
 gen_test_data.py
 index.node
 dist/lancedb*.tgz
 vectordb*.tgz
--- a/node/CHANGELOG.md
+++ b/node/CHANGELOG.md
@@ -1,64 +0,0 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## [0.1.5] - 2023-06-00
 ### Added
 - Support for macOS X86
 ## [0.1.4] - 2023-06-03
 ### Added
 - Select / Project query API
 ### Changed
 -  Deprecated created_index in favor of createIndex
 ## [0.1.3] - 2023-06-01
 ### Added
 - Support S3 and Google Cloud Storage
 - Embedding functions support
 - OpenAI embedding function
 ## [0.1.2] - 2023-05-27
 ### Added
 - Append records API
 - Extra query params to to nodejs client
 - Create_index API
 ### Fixed
 - bugfix: string columns should be converted to Utf8Array (#94)
 ## [0.1.1] - 2023-05-16
 ### Added
 - create_table API
 - limit parameter for queries
 - Typescript / JavaScript examples
 - Linux support
 ## [0.1.0] - 2023-05-16
 ### Added
 - Initial  JavaScript / Node.js library for LanceDB
 - Read-only api to query LanceDB datasets
 - Supports macOS arm only
 ## [pre-0.1.0]
 - Various prototypes / test builds
--- a/node/README.md
+++ b/node/README.md
@@ -1,66 +0,0 @@
 # LanceDB
 A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb).
 **DEPRECATED: This library is deprecated. Please use the new client,
 [@lancedb/lancedb](https://www.npmjs.com/package/@lancedb/lancedb).**
 ## Installation
 ```bash
 npm install vectordb
 ```
 This will download the appropriate native library for your platform. We currently
 support:
 * Linux (x86_64 and aarch64)
 * MacOS (Intel and ARM/M1/M2)
 * Windows (x86_64 only)
 We do not yet support musl-based Linux (such as Alpine Linux) or aarch64 Windows.
 ## Usage
 ### Basic Example
 ```javascript
 const lancedb = require('vectordb');
 const db = await lancedb.connect('data/sample-lancedb');
 const table = await db.createTable("my_table",
      [{ id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 },
      { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 }])
 const results = await table.search([0.1, 0.3]).limit(20).execute();
 console.log(results);
 ```
 The [examples](./examples) folder contains complete examples.
 ## Development
 To build everything fresh:
 ```bash
 npm install
 npm run build
 ```
 Then you should be able to run the tests with:
 ```bash
 npm test
 ```
 ### Fix lints
 To run the linter and have it automatically fix all errors
 ```bash
 npm run lint -- --fix
 ```
 To build documentation
 ```bash
 npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
 ```
--- a/node/examples/js-openai/index.js
+++ b/node/examples/js-openai/index.js
@@ -1,41 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 'use strict'
 async function example () {
  const lancedb = require('vectordb')
  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
  const apiKey = process.env.OPENAI_API_KEY
  // The embedding function will create embeddings for the 'text' column(text in this case)
  const embedding = new lancedb.OpenAIEmbeddingFunction('text', apiKey)
  const db = await lancedb.connect('data/sample-lancedb')
  const data = [
    { id: 1, text: 'Black T-Shirt', price: 10 },
    { id: 2, text: 'Leather Jacket', price: 50 }
  ]
  const table = await db.createTable('vectors', data, embedding)
  console.log(await db.tableNames())
  const results = await table
    .search('keeps me warm')
    .limit(1)
    .execute()
  console.log(results[0].text)
 }
 example().then(_ => { console.log('All done!') })
--- a/node/examples/js-openai/package.json
+++ b/node/examples/js-openai/package.json
@@ -1,15 +0,0 @@
 {
  "name": "vectordb-example-js-openai",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
    "vectordb": "file:../..",
    "openai": "^3.2.1"
  }
 }
--- a/node/examples/js-transformers/index.js
+++ b/node/examples/js-transformers/index.js
@@ -1,66 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 'use strict'
 async function example() {
    const lancedb = require('vectordb')
    // Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
    const { pipeline } = await import('@xenova/transformers')
    const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
    // Create embedding function from pipeline which returns a list of vectors from batch
    // sourceColumn is the name of the column in the data to be embedded
    //
    // Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
    const embed_fun = {}
    embed_fun.sourceColumn = 'text'
    embed_fun.embed = async function (batch) {
        let result = []
        for (let text of batch) {
            const res = await pipe(text, { pooling: 'mean', normalize: true })
            result.push(Array.from(res['data']))
        }
        return (result)
    }
    // Link a folder and create a table with data
    const db = await lancedb.connect('data/sample-lancedb')
    const data = [
        { id: 1, text: 'Cherry', type: 'fruit' },
        { id: 2, text: 'Carrot', type: 'vegetable' },
        { id: 3, text: 'Potato', type: 'vegetable' },
        { id: 4, text: 'Apple', type: 'fruit' },
        { id: 5, text: 'Banana', type: 'fruit' }
    ]
    const table = await db.createTable('food_table', data, embed_fun)
    // Query the table
    const results = await table
        .search("a sweet fruit to eat")
        .metricType("cosine")
        .limit(2)
        .execute()
    console.log(results.map(r => r.text))
 }
 example().then(_ => { console.log("Done!") })
--- a/node/examples/js-transformers/package.json
+++ b/node/examples/js-transformers/package.json
@@ -1,16 +0,0 @@
 {
  "name": "vectordb-example-js-transformers",
  "version": "1.0.0",
  "description": "Example for using transformers.js with lancedb",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
    "@xenova/transformers": "^2.4.1",
    "vectordb": "file:../.."
  }
 }
--- a/node/examples/js-youtube-transcripts/index.js
+++ b/node/examples/js-youtube-transcripts/index.js
@@ -1,122 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 'use strict'
 const lancedb = require('vectordb')
 const fs = require('fs/promises')
 const readline = require('readline/promises')
 const { stdin: input, stdout: output } = require('process')
 const { Configuration, OpenAIApi } = require('openai')
 // Download file from XYZ
 const INPUT_FILE_NAME = 'data/youtube-transcriptions_sample.jsonl';
 (async () => {
  // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
  const apiKey = process.env.OPENAI_API_KEY
  // The embedding function will create embeddings for the 'context' column
  const embedFunction = new lancedb.OpenAIEmbeddingFunction('context', apiKey)
  // Connects to LanceDB
  const db = await lancedb.connect('data/youtube-lancedb')
  // Open the vectors table or create one if it does not exist
  let tbl
  if ((await db.tableNames()).includes('vectors')) {
    tbl = await db.openTable('vectors', embedFunction)
  } else {
    tbl = await createEmbeddingsTable(db, embedFunction)
  }
  // Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
  const configuration = new Configuration({ apiKey })
  const openai = new OpenAIApi(configuration)
  const rl = readline.createInterface({ input, output })
  try {
    while (true) {
      const query = await rl.question('Prompt: ')
      const results = await tbl
        .search(query)
        .select(['title', 'text', 'context'])
        .limit(3)
        .execute()
      // console.table(results)
      const response = await openai.createCompletion({
        model: 'text-davinci-003',
        prompt: createPrompt(query, results),
        max_tokens: 400,
        temperature: 0,
        top_p: 1,
        frequency_penalty: 0,
        presence_penalty: 0
      })
      console.log(response.data.choices[0].text)
    }
  } catch (err) {
    console.log('Error: ', err)
  } finally {
    rl.close()
  }
  process.exit(1)
 })()
 async function createEmbeddingsTable (db, embedFunction) {
  console.log(`Creating embeddings from ${INPUT_FILE_NAME}`)
  // read the input file into a JSON array, skipping empty lines
  const lines = (await fs.readFile(INPUT_FILE_NAME, 'utf-8'))
    .toString()
    .split('\n')
    .filter(line => line.length > 0)
    .map(line => JSON.parse(line))
  const data = contextualize(lines, 20, 'video_id')
  return await db.createTable('vectors', data, embedFunction)
 }
 // Each transcript has a small text column, we include previous transcripts in order to
 // have more context information when creating embeddings
 function contextualize (rows, contextSize, groupColumn) {
  const grouped = []
  rows.forEach(row => {
    if (!grouped[row[groupColumn]]) {
      grouped[row[groupColumn]] = []
    }
    grouped[row[groupColumn]].push(row)
  })
  const data = []
  Object.keys(grouped).forEach(key => {
    for (let i = 0; i < grouped[key].length; i++) {
      const start = i - contextSize > 0 ? i - contextSize : 0
      grouped[key][i].context = grouped[key].slice(start, i + 1).map(r => r.text).join(' ')
    }
    data.push(...grouped[key])
  })
  return data
 }
 // Creates a prompt by aggregating all relevant contexts
 function createPrompt (query, context) {
  let prompt =
      'Answer the question based on the context below.\n\n' +
      'Context:\n'
  // need to make sure our prompt is not larger than max size
  prompt = prompt + context.map(c => c.context).join('\n\n---\n\n').substring(0, 3750)
  prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`
  return prompt
 }
--- a/node/examples/js-youtube-transcripts/package.json
+++ b/node/examples/js-youtube-transcripts/package.json
@@ -1,15 +0,0 @@
 {
  "name": "vectordb-example-js-openai",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
    "vectordb": "file:../..",
    "openai": "^3.2.1"
  }
 }
--- a/node/examples/js/index.js
+++ b/node/examples/js/index.js
@@ -1,36 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 'use strict'
 async function example () {
  const lancedb = require('vectordb')
  const db = await lancedb.connect('data/sample-lancedb')
  const data = [
    { id: 1, vector: [0.1, 0.2], price: 10 },
    { id: 2, vector: [1.1, 1.2], price: 50 }
  ]
  const table = await db.createTable('vectors', data)
  console.log(await db.tableNames())
  const results = await table
      .search([0.1, 0.3])
      .limit(20)
      .execute()
  console.log(results)
 }
 example()
--- a/node/examples/js/package.json
+++ b/node/examples/js/package.json
@@ -1,14 +0,0 @@
 {
  "name": "vectordb-example-js",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
    "vectordb": "file:../.."
  }
 }
--- a/node/examples/ts/package.json
+++ b/node/examples/ts/package.json
@@ -1,22 +0,0 @@
 {
  "name": "vectordb-example-ts",
  "version": "1.0.0",
  "description": "",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
  "scripts": {
    "tsc": "tsc -b",
    "build": "tsc"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "devDependencies": {
    "@types/node": "^18.16.2",
    "ts-node": "^10.9.1",
    "ts-node-dev": "^2.0.0",
    "typescript": "*"
  },
  "dependencies": {
    "vectordb": "file:../.."
  }
 }
--- a/node/examples/ts/src/index.ts
+++ b/node/examples/ts/src/index.ts
@@ -1,35 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import * as vectordb from 'vectordb';
 async function example () {
    const db = await vectordb.connect('data/sample-lancedb')
    const data = [
        { id: 1, vector: [0.1, 0.2], price: 10 },
        { id: 2, vector: [1.1, 1.2], price: 50 }
    ]
    const table = await db.createTable('vectors', data)
    console.log(await db.tableNames())
    const results = await table
        .search([0.1, 0.3])
        .limit(20)
        .execute()
    console.log(results)
 }
 example().then(_ => { console.log ("All done!") })
--- a/node/examples/ts/tsconfig.json
+++ b/node/examples/ts/tsconfig.json
@@ -1,10 +0,0 @@
 {
  "include": ["src/**/*.ts"],
  "compilerOptions": {
    "target": "es2016",
    "module": "commonjs",
    "declaration": true,
    "outDir": "./dist",
    "strict": true
  }
 }
--- a/node/native.js
+++ b/node/native.js
@@ -1,36 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 const { currentTarget } = require('@neon-rs/load')
 let nativeLib
 try {
  // When developing locally, give preference to the local built library
  nativeLib = require('./index.node')
 } catch {
  try {
    nativeLib = require(`@lancedb/vectordb-${currentTarget()}`)
  } catch (e) {
    throw new Error(`vectordb: failed to load native library.
  You may need to run \`npm install @lancedb/vectordb-${currentTarget()}\`.
  If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
  Source error: ${e}`)
  }
 }
 // Dynamic require for runtime.
 module.exports = nativeLib
--- a/node/package-lock.json
+++ b/node/package-lock.json
--- a/node/package.json
+++ b/node/package.json
@@ -1,98 +0,0 @@
 {
  "name": "vectordb",
  "version": "0.20.1-beta.2",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
  "scripts": {
    "tsc": "tsc -b",
    "build": "npm run tsc && cargo-cp-artifact --artifact cdylib lancedb_node index.node -- cargo build -p lancedb-node --message-format=json",
    "build-release": "npm run build -- --release",
    "test": "npm run tsc && mocha -recursive dist/test",
    "integration-test": "npm run tsc && mocha -recursive dist/integration_test",
    "lint": "eslint native.js src --ext .js,.ts",
    "clean": "rm -rf node_modules *.node dist/",
    "pack-build": "neon pack-build",
    "check-npm": "printenv && which node && which npm && npm --version"
  },
  "repository": {
    "type": "git",
    "url": "https://github.com/lancedb/lancedb.git"
  },
  "homepage": "https://lancedb.github.io/lancedb/",
  "bugs": {
    "url": "https://github.com/lancedb/lancedb/issues"
  },
  "keywords": [
    "data-format",
    "data-science",
    "machine-learning",
    "data-analytics"
  ],
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "devDependencies": {
    "@neon-rs/cli": "^0.0.160",
    "@types/chai": "^4.3.4",
    "@types/chai-as-promised": "^7.1.5",
    "@types/mocha": "^10.0.1",
    "@types/node": "^18.16.2",
    "@types/sinon": "^10.0.15",
    "@types/temp": "^0.9.1",
    "@types/uuid": "^9.0.3",
    "@typescript-eslint/eslint-plugin": "^5.59.1",
    "apache-arrow-old": "npm:apache-arrow@13.0.0",
    "cargo-cp-artifact": "^0.1",
    "chai": "^4.3.7",
    "chai-as-promised": "^7.1.1",
    "eslint": "^8.39.0",
    "eslint-config-standard-with-typescript": "^34.0.1",
    "eslint-plugin-import": "^2.26.0",
    "eslint-plugin-n": "^15.7.0",
    "eslint-plugin-promise": "^6.1.1",
    "mocha": "^10.2.0",
    "openai": "^4.24.1",
    "sinon": "^15.1.0",
    "temp": "^0.9.4",
    "ts-node": "^10.9.1",
    "ts-node-dev": "^2.0.0",
    "typedoc": "^0.24.7",
    "typedoc-plugin-markdown": "^3.15.3",
    "typescript": "^5.1.0",
    "uuid": "^9.0.0"
  },
  "dependencies": {
    "@neon-rs/load": "^0.0.74",
    "axios": "^1.4.0"
  },
  "peerDependencies": {
    "@apache-arrow/ts": "^14.0.2",
    "apache-arrow": "^14.0.2"
  },
  "os": [
    "darwin",
    "linux",
    "win32"
  ],
  "cpu": [
    "x64",
    "arm64"
  ],
  "neon": {
    "targets": {
      "x86_64-apple-darwin": "@lancedb/vectordb-darwin-x64",
      "aarch64-apple-darwin": "@lancedb/vectordb-darwin-arm64",
      "x86_64-unknown-linux-gnu": "@lancedb/vectordb-linux-x64-gnu",
      "aarch64-unknown-linux-gnu": "@lancedb/vectordb-linux-arm64-gnu",
      "x86_64-pc-windows-msvc": "@lancedb/vectordb-win32-x64-msvc"
    }
  },
  "optionalDependencies": {
    "@lancedb/vectordb-darwin-x64": "0.20.1-beta.2",
    "@lancedb/vectordb-darwin-arm64": "0.20.1-beta.2",
    "@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.2",
    "@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.2",
    "@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.2"
  }
 }
--- a/node/src/arrow.ts
+++ b/node/src/arrow.ts
@@ -1,635 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import {
  Field,
  makeBuilder,
  RecordBatchFileWriter,
  Utf8,
  type Vector,
  FixedSizeList,
  vectorFromArray,
  Schema,
  Table as ArrowTable,
  RecordBatchStreamWriter,
  List,
  RecordBatch,
  makeData,
  Struct,
  type Float,
  DataType,
  Binary,
  Float32
 } from "apache-arrow";
 import { type EmbeddingFunction } from "./index";
 import { sanitizeSchema } from "./sanitize";
 /*
 * Options to control how a column should be converted to a vector array
 */
 export class VectorColumnOptions {
  /** Vector column type. */
  type: Float = new Float32();
  constructor(values?: Partial<VectorColumnOptions>) {
    Object.assign(this, values);
  }
 }
 /** Options to control the makeArrowTable call. */
 export class MakeArrowTableOptions {
  /*
   * Schema of the data.
   *
   * If this is not provided then the data type will be inferred from the
   * JS type.  Integer numbers will become int64, floating point numbers
   * will become float64 and arrays will become variable sized lists with
   * the data type inferred from the first element in the array.
   *
   * The schema must be specified if there are no records (e.g. to make
   * an empty table)
   */
  schema?: Schema;
  /*
   * Mapping from vector column name to expected type
   *
   * Lance expects vector columns to be fixed size list arrays (i.e. tensors)
   * However, `makeArrowTable` will not infer this by default (it creates
   * variable size list arrays).  This field can be used to indicate that a column
   * should be treated as a vector column and converted to a fixed size list.
   *
   * The keys should be the names of the vector columns.  The value specifies the
   * expected data type of the vector columns.
   *
   * If `schema` is provided then this field is ignored.
   *
   * By default, the column named "vector" will be assumed to be a float32
   * vector column.
   */
  vectorColumns: Record<string, VectorColumnOptions> = {
    vector: new VectorColumnOptions()
  };
  embeddings?: EmbeddingFunction<any>;
  /**
   * If true then string columns will be encoded with dictionary encoding
   *
   * Set this to true if your string columns tend to repeat the same values
   * often.  For more precise control use the `schema` property to specify the
   * data type for individual columns.
   *
   * If `schema` is provided then this property is ignored.
   */
  dictionaryEncodeStrings: boolean = false;
  constructor(values?: Partial<MakeArrowTableOptions>) {
    Object.assign(this, values);
  }
 }
 /**
 * An enhanced version of the {@link makeTable} function from Apache Arrow
 * that supports nested fields and embeddings columns.
 *
 * This function converts an array of Record<String, any> (row-major JS objects)
 * to an Arrow Table (a columnar structure)
 *
 * Note that it currently does not support nulls.
 *
 * If a schema is provided then it will be used to determine the resulting array
 * types.  Fields will also be reordered to fit the order defined by the schema.
 *
 * If a schema is not provided then the types will be inferred and the field order
 * will be controlled by the order of properties in the first record.
 *
 * If the input is empty then a schema must be provided to create an empty table.
 *
 * When a schema is not specified then data types will be inferred.  The inference
 * rules are as follows:
 *
 *  - boolean => Bool
 *  - number => Float64
 *  - String => Utf8
 *  - Buffer => Binary
 *  - Record<String, any> => Struct
 *  - Array<any> => List
 *
 * @param data input data
 * @param options options to control the makeArrowTable call.
 *
 * @example
 *
 * ```ts
 *
 * import { fromTableToBuffer, makeArrowTable } from "../arrow";
 * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
 *
 * const schema = new Schema([
 *   new Field("a", new Int32()),
 *   new Field("b", new Float32()),
 *   new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
 *  ]);
 *  const table = makeArrowTable([
 *    { a: 1, b: 2, c: [1, 2, 3] },
 *    { a: 4, b: 5, c: [4, 5, 6] },
 *    { a: 7, b: 8, c: [7, 8, 9] },
 *  ], { schema });
 * ```
 *
 * By default it assumes that the column named `vector` is a vector column
 * and it will be converted into a fixed size list array of type float32.
 * The `vectorColumns` option can be used to support other vector column
 * names and data types.
 *
 * ```ts
 *
 * const schema = new Schema([
    new Field("a", new Float64()),
    new Field("b", new Float64()),
    new Field(
      "vector",
      new FixedSizeList(3, new Field("item", new Float32()))
    ),
  ]);
  const table = makeArrowTable([
    { a: 1, b: 2, vector: [1, 2, 3] },
    { a: 4, b: 5, vector: [4, 5, 6] },
    { a: 7, b: 8, vector: [7, 8, 9] },
  ]);
  assert.deepEqual(table.schema, schema);
 * ```
 *
 * You can specify the vector column types and names using the options as well
 *
 * ```typescript
 *
 * const schema = new Schema([
    new Field('a', new Float64()),
    new Field('b', new Float64()),
    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
  ]);
 * const table = makeArrowTable([
    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
  ], {
    vectorColumns: {
      vec1: { type: new Float16() },
      vec2: { type: new Float16() }
    }
  }
 * assert.deepEqual(table.schema, schema)
 * ```
 */
 export function makeArrowTable(
  data: Array<Record<string, any>>,
  options?: Partial<MakeArrowTableOptions>
 ): ArrowTable {
  if (
    data.length === 0 &&
    (options?.schema === undefined || options?.schema === null)
  ) {
    throw new Error("At least one record or a schema needs to be provided");
  }
  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
  if (opt.schema !== undefined && opt.schema !== null) {
    opt.schema = sanitizeSchema(opt.schema);
    opt.schema = validateSchemaEmbeddings(opt.schema, data, opt.embeddings);
  }
  const columns: Record<string, Vector> = {};
  // TODO: sample dataset to find missing columns
  // Prefer the field ordering of the schema, if present
  const columnNames =
    opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
  for (const colName of columnNames) {
    if (
      data.length !== 0 &&
      !Object.prototype.hasOwnProperty.call(data[0], colName)
    ) {
      // The field is present in the schema, but not in the data, skip it
      continue;
    }
    // Extract a single column from the records (transpose from row-major to col-major)
    let values = data.map((datum) => datum[colName]);
    // By default (type === undefined) arrow will infer the type from the JS type
    let type;
    if (opt.schema !== undefined) {
      // If there is a schema provided, then use that for the type instead
      type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
      if (DataType.isInt(type) && type.bitWidth === 64) {
        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
        values = values.map((v) => {
          if (v === null) {
            return v;
          }
          return BigInt(v);
        });
      }
    } else {
      // Otherwise, check to see if this column is one of the vector columns
      // defined by opt.vectorColumns and, if so, use the fixed size list type
      const vectorColumnOptions = opt.vectorColumns[colName];
      if (vectorColumnOptions !== undefined) {
        type = newVectorType(values[0].length, vectorColumnOptions.type);
      }
    }
    try {
      // Convert an Array of JS values to an arrow vector
      columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
    } catch (error: unknown) {
      // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
      throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
    }
  }
  if (opt.schema != null) {
    // `new ArrowTable(columns)` infers a schema which may sometimes have
    // incorrect nullability (it assumes nullable=true if there are 0 rows)
    //
    // `new ArrowTable(schema, columns)` will also fail because it will create a
    // batch with an inferred schema and then complain that the batch schema
    // does not match the provided schema.
    //
    // To work around this we first create a table with the wrong schema and
    // then patch the schema of the batches so we can use
    // `new ArrowTable(schema, batches)` which does not do any schema inference
    const firstTable = new ArrowTable(columns);
    const batchesFixed = firstTable.batches.map(
      // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
      (batch) => new RecordBatch(opt.schema!, batch.data)
    );
    return new ArrowTable(opt.schema, batchesFixed);
  } else {
    return new ArrowTable(columns);
  }
 }
 /**
 * Create an empty Arrow table with the provided schema
 */
 export function makeEmptyTable(schema: Schema): ArrowTable {
  return makeArrowTable([], { schema });
 }
 // Helper function to convert Array<Array<any>> to a variable sized list array
 function makeListVector(lists: any[][]): Vector<any> {
  if (lists.length === 0 || lists[0].length === 0) {
    throw Error("Cannot infer list vector from empty array or empty list");
  }
  const sampleList = lists[0];
  let inferredType;
  try {
    const sampleVector = makeVector(sampleList);
    inferredType = sampleVector.type;
  } catch (error: unknown) {
    // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
    throw Error(`Cannot infer list vector.  Cannot infer inner type: ${error}`);
  }
  const listBuilder = makeBuilder({
    type: new List(new Field("item", inferredType, true))
  });
  for (const list of lists) {
    listBuilder.append(list);
  }
  return listBuilder.finish().toVector();
 }
 // Helper function to convert an Array of JS values to an Arrow Vector
 function makeVector(
  values: any[],
  type?: DataType,
  stringAsDictionary?: boolean
 ): Vector<any> {
  if (type !== undefined) {
    // No need for inference, let Arrow create it
    return vectorFromArray(values, type);
  }
  if (values.length === 0) {
    throw Error(
      "makeVector requires at least one value or the type must be specfied"
    );
  }
  const sampleValue = values.find((val) => val !== null && val !== undefined);
  if (sampleValue === undefined) {
    throw Error(
      "makeVector cannot infer the type if all values are null or undefined"
    );
  }
  if (Array.isArray(sampleValue)) {
    // Default Arrow inference doesn't handle list types
    return makeListVector(values);
  } else if (Buffer.isBuffer(sampleValue)) {
    // Default Arrow inference doesn't handle Buffer
    return vectorFromArray(values, new Binary());
  } else if (
    !(stringAsDictionary ?? false) &&
    (typeof sampleValue === "string" || sampleValue instanceof String)
  ) {
    // If the type is string then don't use Arrow's default inference unless dictionaries are requested
    // because it will always use dictionary encoding for strings
    return vectorFromArray(values, new Utf8());
  } else {
    // Convert a JS array of values to an arrow vector
    return vectorFromArray(values);
  }
 }
 async function applyEmbeddings<T>(
  table: ArrowTable,
  embeddings?: EmbeddingFunction<T>,
  schema?: Schema
 ): Promise<ArrowTable> {
  if (embeddings == null) {
    return table;
  }
  if (schema !== undefined && schema !== null) {
    schema = sanitizeSchema(schema);
  }
  // Convert from ArrowTable to Record<String, Vector>
  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
    const name = table.schema.fields[idx].name;
    // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
    const vec = table.getChildAt(idx)!;
    return [name, vec];
  });
  const newColumns = Object.fromEntries(colEntries);
  const sourceColumn = newColumns[embeddings.sourceColumn];
  const destColumn = embeddings.destColumn ?? "vector";
  const innerDestType = embeddings.embeddingDataType ?? new Float32();
  if (sourceColumn === undefined) {
    throw new Error(
      `Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`
    );
  }
  if (table.numRows === 0) {
    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
      // We have an empty table and it already has the embedding column so no work needs to be done
      // Note: we don't return an error like we did below because this is a common occurrence.  For example,
      // if we call convertToTable with 0 records and a schema that includes the embedding
      return table;
    }
    if (embeddings.embeddingDimension !== undefined) {
      const destType = newVectorType(
        embeddings.embeddingDimension,
        innerDestType
      );
      newColumns[destColumn] = makeVector([], destType);
    } else if (schema != null) {
      const destField = schema.fields.find((f) => f.name === destColumn);
      if (destField != null) {
        newColumns[destColumn] = makeVector([], destField.type);
      } else {
        throw new Error(
          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`
        );
      }
    } else {
      throw new Error(
        "Attempt to apply embeddings to an empty table when the embeddings function does not specify `embeddingDimension`"
      );
    }
  } else {
    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
      throw new Error(
        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`
      );
    }
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch"
      );
    }
    const values = sourceColumn.toArray();
    const vectors = await embeddings.embed(values as T[]);
    if (vectors.length !== values.length) {
      throw new Error(
        "Embedding function did not return an embedding for each input element"
      );
    }
    const destType = newVectorType(vectors[0].length, innerDestType);
    newColumns[destColumn] = makeVector(vectors, destType);
  }
  const newTable = new ArrowTable(newColumns);
  if (schema != null) {
    if (schema.fields.find((f) => f.name === destColumn) === undefined) {
      throw new Error(
        `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`
      );
    }
    return alignTable(newTable, schema);
  }
  return newTable;
 }
 /*
 * Convert an Array of records into an Arrow Table, optionally applying an
 * embeddings function to it.
 *
 * This function calls `makeArrowTable` first to create the Arrow Table.
 * Any provided `makeTableOptions` (e.g. a schema) will be passed on to
 * that call.
 *
 * The embedding function will be passed a column of values (based on the
 * `sourceColumn` of the embedding function) and expects to receive back
 * number[][] which will be converted into a fixed size list column.  By
 * default this will be a fixed size list of Float32 but that can be
 * customized by the `embeddingDataType` property of the embedding function.
 *
 * If a schema is provided in `makeTableOptions` then it should include the
 * embedding columns.  If no schema is provded then embedding columns will
 * be placed at the end of the table, after all of the input columns.
 */
 export async function convertToTable<T>(
  data: Array<Record<string, unknown>>,
  embeddings?: EmbeddingFunction<T>,
  makeTableOptions?: Partial<MakeArrowTableOptions>
 ): Promise<ArrowTable> {
  const table = makeArrowTable(data, makeTableOptions);
  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
 }
 // Creates the Arrow Type for a Vector column with dimension `dim`
 function newVectorType<T extends Float>(
  dim: number,
  innerType: T
 ): FixedSizeList<T> {
  // Somewhere we always default to have the elements nullable, so we need to set it to true
  // otherwise we often get schema mismatches because the stored data always has schema with nullable elements
  const children = new Field<T>("item", innerType, true);
  return new FixedSizeList(dim, children);
 }
 /**
 * Serialize an Array of records into a buffer using the Arrow IPC File serialization
 *
 * This function will call `convertToTable` and pass on `embeddings` and `schema`
 *
 * `schema` is required if data is empty
 */
 export async function fromRecordsToBuffer<T>(
  data: Array<Record<string, unknown>>,
  embeddings?: EmbeddingFunction<T>,
  schema?: Schema
 ): Promise<Buffer> {
  if (schema !== undefined && schema !== null) {
    schema = sanitizeSchema(schema);
  }
  const table = await convertToTable(data, embeddings, { schema, embeddings });
  const writer = RecordBatchFileWriter.writeAll(table);
  return Buffer.from(await writer.toUint8Array());
 }
 /**
 * Serialize an Array of records into a buffer using the Arrow IPC Stream serialization
 *
 * This function will call `convertToTable` and pass on `embeddings` and `schema`
 *
 * `schema` is required if data is empty
 */
 export async function fromRecordsToStreamBuffer<T>(
  data: Array<Record<string, unknown>>,
  embeddings?: EmbeddingFunction<T>,
  schema?: Schema
 ): Promise<Buffer> {
  if (schema !== null && schema !== undefined) {
    schema = sanitizeSchema(schema);
  }
  const table = await convertToTable(data, embeddings, { schema });
  const writer = RecordBatchStreamWriter.writeAll(table);
  return Buffer.from(await writer.toUint8Array());
 }
 /**
 * Serialize an Arrow Table into a buffer using the Arrow IPC File serialization
 *
 * This function will apply `embeddings` to the table in a manner similar to
 * `convertToTable`.
 *
 * `schema` is required if the table is empty
 */
 export async function fromTableToBuffer<T>(
  table: ArrowTable,
  embeddings?: EmbeddingFunction<T>,
  schema?: Schema
 ): Promise<Buffer> {
  if (schema !== null && schema !== undefined) {
    schema = sanitizeSchema(schema);
  }
  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
  const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings);
  return Buffer.from(await writer.toUint8Array());
 }
 /**
 * Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
 *
 * This function will apply `embeddings` to the table in a manner similar to
 * `convertToTable`.
 *
 * `schema` is required if the table is empty
 */
 export async function fromTableToStreamBuffer<T>(
  table: ArrowTable,
  embeddings?: EmbeddingFunction<T>,
  schema?: Schema
 ): Promise<Buffer> {
  if (schema !== null && schema !== undefined) {
    schema = sanitizeSchema(schema);
  }
  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
  const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
  return Buffer.from(await writer.toUint8Array());
 }
 function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
  const alignedChildren = [];
  for (const field of schema.fields) {
    const indexInBatch = batch.schema.fields?.findIndex(
      (f) => f.name === field.name
    );
    if (indexInBatch < 0) {
      throw new Error(
        `The column ${field.name} was not found in the Arrow Table`
      );
    }
    alignedChildren.push(batch.data.children[indexInBatch]);
  }
  const newData = makeData({
    type: new Struct(schema.fields),
    length: batch.numRows,
    nullCount: batch.nullCount,
    children: alignedChildren
  });
  return new RecordBatch(schema, newData);
 }
 function alignTable(table: ArrowTable, schema: Schema): ArrowTable {
  const alignedBatches = table.batches.map((batch) =>
    alignBatch(batch, schema)
  );
  return new ArrowTable(schema, alignedBatches);
 }
 // Creates an empty Arrow Table
 export function createEmptyTable(schema: Schema): ArrowTable {
  return new ArrowTable(sanitizeSchema(schema));
 }
 function validateSchemaEmbeddings(
  schema: Schema<any>,
  data: Array<Record<string, unknown>>,
  embeddings: EmbeddingFunction<any> | undefined
 ) {
  const fields = [];
  const missingEmbeddingFields = [];
  // First we check if the field is a `FixedSizeList`
  // Then we check if the data contains the field
  // if it does not, we add it to the list of missing embedding fields
  // Finally, we check if those missing embedding fields are `this._embeddings`
  // if they are not, we throw an error
  for (const field of schema.fields) {
    if (field.type instanceof FixedSizeList) {
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
        missingEmbeddingFields.push(field);
      } else {
        fields.push(field);
      }
    } else {
      fields.push(field);
    }
  }
  if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
    throw new Error(
      `Table has embeddings: "${missingEmbeddingFields
        .map((f) => f.name)
        .join(",")}", but no embedding function was provided`
    );
  }
  return new Schema(fields, schema.metadata);
 }
--- a/node/src/embedding/embedding_function.ts
+++ b/node/src/embedding/embedding_function.ts
@@ -1,68 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { type Float } from 'apache-arrow'
 /**
 * An embedding function that automatically creates vector representation for a given column.
 */
 export interface EmbeddingFunction<T> {
  /**
   * The name of the column that will be used as input for the Embedding Function.
   */
  sourceColumn: string
  /**
   * The data type of the embedding
   *
   * The embedding function should return `number`.  This will be converted into
   * an Arrow float array.  By default this will be Float32 but this property can
   * be used to control the conversion.
   */
  embeddingDataType?: Float
  /**
   * The dimension of the embedding
   *
   * This is optional, normally this can be determined by looking at the results of
   * `embed`.  If this is not specified, and there is an attempt to apply the embedding
   * to an empty table, then that process will fail.
   */
  embeddingDimension?: number
  /**
   * The name of the column that will contain the embedding
   *
   * By default this is "vector"
   */
  destColumn?: string
  /**
   * Should the source column be excluded from the resulting table
   *
   * By default the source column is included.  Set this to true and
   * only the embedding will be stored.
   */
  excludeSource?: boolean
  /**
   * Creates a vector representation for the given values.
   */
  embed: (data: T[]) => Promise<number[][]>
 }
 export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
  return typeof value.sourceColumn === 'string' &&
      typeof value.embed === 'function'
 }
--- a/node/src/embedding/openai.ts
+++ b/node/src/embedding/openai.ts
@@ -1,57 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { type EmbeddingFunction } from '../index'
 import type OpenAI from 'openai'
 export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
  private readonly _openai: OpenAI
  private readonly _modelName: string
  constructor (sourceColumn: string, openAIKey: string, modelName: string = 'text-embedding-ada-002') {
    /**
     * @type {import("openai").default}
     */
    let Openai
    try {
      // eslint-disable-next-line @typescript-eslint/no-var-requires
      Openai = require('openai')
    } catch {
      throw new Error('please install openai@^4.24.1 using npm install openai')
    }
    this.sourceColumn = sourceColumn
    const configuration = {
      apiKey: openAIKey
    }
    this._openai = new Openai(configuration)
    this._modelName = modelName
  }
  async embed (data: string[]): Promise<number[][]> {
    const response = await this._openai.embeddings.create({
      model: this._modelName,
      input: data
    })
    const embeddings: number[][] = []
    for (let i = 0; i < response.data.length; i++) {
      embeddings.push(response.data[i].embedding)
    }
    return embeddings
  }
  sourceColumn: string
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
--- a/node/src/integration_test/test.ts
+++ b/node/src/integration_test/test.ts
@@ -1,180 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { describe } from 'mocha'
 import * as chai from 'chai'
 import { assert } from 'chai'
 import * as chaiAsPromised from 'chai-as-promised'
 import { v4 as uuidv4 } from 'uuid'
 import * as lancedb from '../index'
 import { tmpdir } from 'os'
 import * as fs from 'fs'
 import * as path from 'path'
 chai.use(chaiAsPromised)
 describe('LanceDB AWS Integration test', function () {
  it('s3+ddb schema is processed correctly', async function () {
    this.timeout(15000)
    // WARNING: specifying engine is NOT a publicly supported feature in lancedb yet
    // THE API WILL CHANGE
    const conn = await lancedb.connect('s3://lancedb-integtest?engine=ddb&ddbTableName=lancedb-integtest')
    const data = [{ vector: Array(128).fill(1.0) }]
    const tableName = uuidv4()
    let table = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
    const futs = [table.add(data), table.add(data), table.add(data), table.add(data), table.add(data)]
    await Promise.allSettled(futs)
    table = await conn.openTable(tableName)
    assert.equal(await table.countRows(), 6)
  })
 })
 describe('LanceDB Mirrored Store Integration test', function () {
  it('s3://...?mirroredStore=... param is processed correctly', async function () {
    this.timeout(600000)
    const dir = tmpdir()
    console.log(dir)
    const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
    const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 1 }))
    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 2 }))
    data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 3 }))
    const tableName = uuidv4()
    // try create table and check if it's mirrored
    const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })
    const mirroredPath = path.join(dir, `${tableName}.lance`)
    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
      if (err != null) throw err
      // there should be three dirs
      assert.equal(files.length, 3)
      assert.isTrue(files[0].isDirectory())
      assert.isTrue(files[1].isDirectory())
      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].name.endsWith('.txn'))
      })
      fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].name.endsWith('.manifest'))
      })
      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].name.endsWith('.lance'))
      })
    })
    // try create index and check if it's mirrored
    await t.createIndex({ column: 'vector', type: 'ivf_pq' })
    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
      if (err != null) throw err
      // there should be four dirs
      assert.equal(files.length, 4)
      assert.isTrue(files[0].isDirectory())
      assert.isTrue(files[1].isDirectory())
      assert.isTrue(files[2].isDirectory())
      // Two TXs now
      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 2)
        assert.isTrue(files[0].name.endsWith('.txn'))
        assert.isTrue(files[1].name.endsWith('.txn'))
      })
      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].name.endsWith('.lance'))
      })
      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].isDirectory())
        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
          if (err != null) throw err
          assert.equal(files.length, 1)
          assert.isTrue(files[0].isFile())
          assert.isTrue(files[0].name.endsWith('.idx'))
        })
      })
    })
    // try delete and check if it's mirrored
    await t.delete('id = 0')
    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
      if (err != null) throw err
      // there should be five dirs
      assert.equal(files.length, 5)
      assert.isTrue(files[0].isDirectory())
      assert.isTrue(files[1].isDirectory())
      assert.isTrue(files[2].isDirectory())
      assert.isTrue(files[3].isDirectory())
      assert.isTrue(files[4].isDirectory())
      // Three TXs now
      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 3)
        assert.isTrue(files[0].name.endsWith('.txn'))
        assert.isTrue(files[1].name.endsWith('.txn'))
      })
      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].name.endsWith('.lance'))
      })
      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].isDirectory())
        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
          if (err != null) throw err
          assert.equal(files.length, 1)
          assert.isTrue(files[0].isFile())
          assert.isTrue(files[0].name.endsWith('.idx'))
        })
      })
      fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
        if (err != null) throw err
        assert.equal(files.length, 1)
        assert.isTrue(files[0].name.endsWith('.arrow'))
      })
    })
  })
 })
--- a/node/src/middleware.ts
+++ b/node/src/middleware.ts
@@ -1,58 +0,0 @@
 // Copyright 2024 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 /**
 * Middleware for Remote LanceDB Connection or Table
 */
 export interface HttpMiddleware {
  /**
   * A callback that can be used to instrument the behavior of http requests to remote
   * tables. It can be used to add headers, modify the request, or even short-circuit
   * the request and return a response without making the request to the remote endpoint.
   * It can also be used to modify the response from the remote endpoint.
   *
   * @param {RemoteResponse} res - Request to the remote endpoint
   * @param {onRemoteRequestNext} next - Callback to advance the middleware chain
   */
  onRemoteRequest(
    req: RemoteRequest,
    next: (req: RemoteRequest) => Promise<RemoteResponse>,
  ): Promise<RemoteResponse>
 };
 export enum Method {
  GET,
  POST
 }
 /**
 * A LanceDB Remote HTTP Request
 */
 export interface RemoteRequest {
  uri: string
  method: Method
  headers: Map<string, string>
  params?: Map<string, string>
  body?: any
 }
 /**
 * A LanceDB Remote HTTP Response
 */
 export interface RemoteResponse {
  status: number
  statusText: string
  headers: Map<string, string>
  body: () => Promise<any>
 }
--- a/node/src/query.ts
+++ b/node/src/query.ts
@@ -1,163 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { Vector, tableFromIPC } from 'apache-arrow'
 import { type EmbeddingFunction } from './embedding/embedding_function'
 import { type MetricType } from '.'
 // eslint-disable-next-line @typescript-eslint/no-var-requires
 const { tableSearch } = require('../native.js')
 /**
 * A builder for nearest neighbor queries for LanceDB.
 */
 export class Query<T = number[]> {
  private readonly _query?: T
  private readonly _tbl?: any
  private _queryVector?: number[]
  private _limit?: number
  private _refineFactor?: number
  private _nprobes: number
  private _select?: string[]
  private _filter?: string
  private _metricType?: MetricType
  private _prefilter: boolean
  private _fastSearch: boolean
  protected readonly _embeddings?: EmbeddingFunction<T>
  constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
    this._tbl = tbl
    this._query = query
    this._limit = 10
    this._nprobes = 20
    this._refineFactor = undefined
    this._select = undefined
    this._filter = undefined
    this._metricType = undefined
    this._embeddings = embeddings
    this._prefilter = false
    this._fastSearch = false
  }
  /***
     * Sets the number of results that will be returned
     * default value is 10
     * @param value number of results
     */
  limit (value: number): Query<T> {
    this._limit = value
    return this
  }
  /**
     * Refine the results by reading extra elements and re-ranking them in memory.
     * @param value refine factor to use in this query.
     */
  refineFactor (value: number): Query<T> {
    this._refineFactor = value
    return this
  }
  /**
     * The number of probes used. A higher number makes search more accurate but also slower.
     * @param value The number of probes used.
     */
  nprobes (value: number): Query<T> {
    this._nprobes = value
    return this
  }
  /**
     * A filter statement to be applied to this query.
     * @param value A filter in the same format used by a sql WHERE clause.
     */
  filter (value: string): Query<T> {
    this._filter = value
    return this
  }
  where = this.filter
  /** Return only the specified columns.
     *
     * @param value Only select the specified columns. If not specified, all columns will be returned.
     */
  select (value: string[]): Query<T> {
    this._select = value
    return this
  }
  /**
     * The MetricType used for this Query.
     * @param value The metric to the. @see MetricType for the different options
     */
  metricType (value: MetricType): Query<T> {
    this._metricType = value
    return this
  }
  prefilter (value: boolean): Query<T> {
    this._prefilter = value
    return this
  }
  /**
   * Skip searching un-indexed data. This can make search faster, but will miss
   * any data that is not yet indexed.
   */
  fastSearch (value: boolean): Query<T> {
    this._fastSearch = value
    return this
  }
  /**
     * Execute the query and return the results as an Array of Objects
     */
  async execute<T = Record<string, unknown>> (): Promise<T[]> {
    if (this._query !== undefined) {
      if (this._embeddings !== undefined) {
        this._queryVector = (await this._embeddings.embed([this._query]))[0]
      } else {
        this._queryVector = this._query as number[]
      }
    }
    const isElectron = this.isElectron()
    const buffer = await tableSearch.call(this._tbl, this, isElectron)
    const data = tableFromIPC(buffer)
    return data.toArray().map((entry: Record<string, unknown>) => {
      const newObject: Record<string, unknown> = {}
      Object.keys(entry).forEach((key: string) => {
        if (entry[key] instanceof Vector) {
          // toJSON() returns f16 array correctly
          newObject[key] = (entry[key] as any).toJSON()
        } else {
          newObject[key] = entry[key] as any
        }
      })
      return newObject as unknown as T
    })
  }
  // See https://github.com/electron/electron/issues/2288
  private isElectron (): boolean {
    try {
      // eslint-disable-next-line no-prototype-builtins
      return (process?.versions?.hasOwnProperty('electron') || navigator?.userAgent?.toLowerCase()?.includes(' electron'))
    } catch (e) {
      return false
    }
  }
 }
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -1,302 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import axios, { type AxiosError, type AxiosResponse, type ResponseType } from 'axios'
 import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
 import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
 import type { MetricType } from '..'
 interface HttpLancedbClientMiddleware {
  onRemoteRequest(
    req: RemoteRequest,
    next: (req: RemoteRequest) => Promise<RemoteResponse>,
  ): Promise<RemoteResponse>
 }
 /**
 * Invoke the middleware chain and at the end call the remote endpoint
 */
 async function callWithMiddlewares (
  req: RemoteRequest,
  middlewares: HttpLancedbClientMiddleware[],
  opts?: MiddlewareInvocationOptions
 ): Promise<RemoteResponse> {
  async function call (
    i: number,
    req: RemoteRequest
  ): Promise<RemoteResponse> {
    // if we have reached the end of the middleware chain, make the request
    if (i > middlewares.length) {
      const headers = Object.fromEntries(req.headers.entries())
      const params = Object.fromEntries(req.params?.entries() ?? [])
      const timeout = opts?.timeout
      let res
      if (req.method === Method.POST) {
        res = await axios.post(
          req.uri,
          req.body,
          {
            headers,
            params,
            timeout,
            responseType: opts?.responseType
          }
        )
      } else {
        res = await axios.get(
          req.uri,
          {
            headers,
            params,
            timeout
          }
        )
      }
      return toLanceRes(res)
    }
    // call next middleware in chain
    return await middlewares[i - 1].onRemoteRequest(
      req,
      async (req) => {
        return await call(i + 1, req)
      }
    )
  }
  return await call(1, req)
 }
 interface MiddlewareInvocationOptions {
  responseType?: ResponseType
  timeout?: number
 }
 /**
 * Marshall the library response into a LanceDB response
 */
 function toLanceRes (res: AxiosResponse): RemoteResponse {
  const headers = new Map()
  for (const h in res.headers) {
    headers.set(h, res.headers[h])
  }
  return {
    status: res.status,
    statusText: res.statusText,
    headers,
    body: async () => {
      return res.data
    }
  }
 }
 async function decodeErrorData(
  res: RemoteResponse,
  responseType?: ResponseType
 ): Promise<string> {
  const errorData = await res.body()
  if (responseType === 'arraybuffer') {
      return new TextDecoder().decode(errorData)
  } else {
    if (typeof errorData === 'object') {
      return JSON.stringify(errorData)
    }
    return errorData
  }
 }
 export class HttpLancedbClient {
  private readonly _url: string
  private readonly _apiKey: () => string
  private readonly _middlewares: HttpLancedbClientMiddleware[]
  private readonly _timeout: number | undefined
  public constructor (
    url: string,
    apiKey: string,
    timeout?: number,
    private readonly _dbName?: string
  ) {
    this._url = url
    this._apiKey = () => apiKey
    this._middlewares = []
    this._timeout = timeout
  }
  get uri (): string {
    return this._url
  }
  public async search (
    tableName: string,
    vector: number[],
    k: number,
    nprobes: number,
    prefilter: boolean,
    refineFactor?: number,
    columns?: string[],
    filter?: string,
    metricType?: MetricType,
    fastSearch?: boolean
  ): Promise<ArrowTable<any>> {
    const result = await this.post(
      `/v1/table/${tableName}/query/`,
      {
        vector,
        k,
        nprobes,
        refine_factor: refineFactor,
        columns,
        filter,
        prefilter,
        metric: metricType,
        fast_search: fastSearch
      },
      undefined,
      undefined,
      'arraybuffer'
    )
    const table = tableFromIPC(await result.body())
    return table
  }
  /**
   * Sent GET request.
   */
  public async get (path: string, params?: Record<string, string>): Promise<RemoteResponse> {
    const req = {
      uri: `${this._url}${path}`,
      method: Method.GET,
      headers: new Map(Object.entries({
        'Content-Type': 'application/json',
        'x-api-key': this._apiKey(),
        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
      })),
      params: new Map(Object.entries(params ?? {}))
    }
    let response
    try {
      response = await callWithMiddlewares(req, this._middlewares)
      return response
    } catch (err: any) {
      console.error(serializeErrorAsJson(err))
      if (err.response === undefined) {
        throw new Error(`Network Error: ${err.message as string}`)
      }
      response = toLanceRes(err.response)
    }
    if (response.status !== 200) {
      const errorData = await decodeErrorData(response)
      throw new Error(
        `Server Error, status: ${response.status}, ` +
        `message: ${response.statusText}: ${errorData}`
      )
    }
    return response
  }
  /**
   * Sent POST request.
   */
  public async post (
    path: string,
    data?: any,
    params?: Record<string, string>,
    content?: string | undefined,
    responseType?: ResponseType | undefined
  ): Promise<RemoteResponse> {
    const req = {
      uri: `${this._url}${path}`,
      method: Method.POST,
      headers: new Map(Object.entries({
        'Content-Type': content ?? 'application/json',
        'x-api-key': this._apiKey(),
        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
      })),
      params: new Map(Object.entries(params ?? {})),
      body: data
    }
    let response
    try {
      response = await callWithMiddlewares(req, this._middlewares, {
        responseType,
        timeout: this._timeout
      })
      // return response
    } catch (err: any) {
      console.error(serializeErrorAsJson(err))
      if (err.response === undefined) {
        throw new Error(`Network Error: ${err.message as string}`)
      }
      response = toLanceRes(err.response)
    }
    if (response.status !== 200) {
      const errorData = await decodeErrorData(response, responseType)
      throw new Error(
        `Server Error, status: ${response.status}, ` +
        `message: ${response.statusText}: ${errorData}`
      )
    }
    return response
  }
  /**
   * Instrument this client with middleware
   * @param mw - The middleware that instruments the client
   * @returns - an instance of this client instrumented with the middleware
   */
  public withMiddleware (mw: HttpLancedbClientMiddleware): HttpLancedbClient {
    const wrapped = this.clone()
    wrapped._middlewares.push(mw)
    return wrapped
  }
  /**
   * Make a clone of this client
   */
  private clone (): HttpLancedbClient {
    const clone = new HttpLancedbClient(this._url, this._apiKey(), this._timeout, this._dbName)
    for (const mw of this._middlewares) {
      clone._middlewares.push(mw)
    }
    return clone
  }
 }
 function serializeErrorAsJson(err: AxiosError) {
  const error = JSON.parse(JSON.stringify(err, Object.getOwnPropertyNames(err)))
  error.response = err.response != null
      ? JSON.parse(JSON.stringify(
        err.response,
        // config contains the request data, too noisy
        Object.getOwnPropertyNames(err.response).filter(prop => prop !== 'config')
      ))
      : null
  return JSON.stringify({ error })
 }
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -1,567 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import {
  type EmbeddingFunction,
  type Table,
  type VectorIndexParams,
  type Connection,
  type ConnectionOptions,
  type CreateTableOptions,
  type VectorIndex,
  type WriteOptions,
  type IndexStats,
  type UpdateArgs,
  type UpdateSqlArgs,
  makeArrowTable,
  type MergeInsertArgs,
  type ColumnAlteration
 } from '../index'
 import { Query } from '../query'
 import { Vector, Table as ArrowTable } from 'apache-arrow'
 import { HttpLancedbClient } from './client'
 import { isEmbeddingFunction } from '../embedding/embedding_function'
 import {
  createEmptyTable,
  fromRecordsToStreamBuffer,
  fromTableToStreamBuffer
 } from '../arrow'
 import { toSQL, TTLCache } from '../util'
 import { type HttpMiddleware } from '../middleware'
 /**
 * Remote connection.
 */
 export class RemoteConnection implements Connection {
  private _client: HttpLancedbClient
  private readonly _dbName: string
  private readonly _tableCache = new TTLCache(300_000)
  constructor (opts: ConnectionOptions) {
    if (!opts.uri.startsWith('db://')) {
      throw new Error(`Invalid remote DB URI: ${opts.uri}`)
    }
    if (opts.apiKey == null || opts.apiKey === '') {
      opts = Object.assign({}, opts, { apiKey: process.env.LANCEDB_API_KEY })
    }
    if (opts.apiKey === undefined || opts.region === undefined) {
      throw new Error(
        'API key and region are must be passed for remote connections. ' +
        'API key can also be set through LANCEDB_API_KEY env variable.')
    }
    this._dbName = opts.uri.slice('db://'.length)
    let server: string
    if (opts.hostOverride === undefined) {
      server = `https://${this._dbName}.${opts.region}.api.lancedb.com`
    } else {
      server = opts.hostOverride
    }
    this._client = new HttpLancedbClient(
      server,
      opts.apiKey,
      opts.timeout,
      opts.hostOverride === undefined ? undefined : this._dbName
    )
  }
  get uri (): string {
    // add the lancedb+ prefix back
    return 'db://' + this._client.uri
  }
  async tableNames (
    pageToken: string = '',
    limit: number = 10
  ): Promise<string[]> {
    const response = await this._client.get('/v1/table/', {
      limit: `${limit}`,
      page_token: pageToken
    })
    const body = await response.body()
    for (const table of body.tables) {
      this._tableCache.set(table, true)
    }
    return body.tables
  }
  async openTable (name: string): Promise<Table>
  async openTable<T>(
    name: string,
    embeddings: EmbeddingFunction<T>
  ): Promise<Table<T>>
  async openTable<T>(
    name: string,
    embeddings?: EmbeddingFunction<T>
  ): Promise<Table<T>> {
      // check if the table exists
      if (this._tableCache.get(name) === undefined) {
        await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
        this._tableCache.set(name, true)
      }
    if (embeddings !== undefined) {
      return new RemoteTable(this._client, name, embeddings)
    } else {
      return new RemoteTable(this._client, name)
    }
  }
  async createTable<T>(
    nameOrOpts: string | CreateTableOptions<T>,
    data?: Array<Record<string, unknown>> | ArrowTable,
    optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>,
    opt?: WriteOptions
  ): Promise<Table<T>> {
    // Logic copied from LocatlConnection, refactor these to a base class + connectionImpl pattern
    let schema
    let embeddings: undefined | EmbeddingFunction<T>
    let tableName: string
    if (typeof nameOrOpts === 'string') {
      if (
        optsOrEmbedding !== undefined &&
        isEmbeddingFunction(optsOrEmbedding)
      ) {
        embeddings = optsOrEmbedding
      }
      tableName = nameOrOpts
    } else {
      schema = nameOrOpts.schema
      embeddings = nameOrOpts.embeddingFunction
      tableName = nameOrOpts.name
      if (data === undefined) {
        data = nameOrOpts.data
      }
    }
    let buffer: Buffer
    function isEmpty (
      data: Array<Record<string, unknown>> | ArrowTable<any>
    ): boolean {
      if (data instanceof ArrowTable) {
        return data.numRows === 0
      }
      return data.length === 0
    }
    if (data === undefined || isEmpty(data)) {
      if (schema === undefined) {
        throw new Error('Either data or schema needs to defined')
      }
      buffer = await fromTableToStreamBuffer(createEmptyTable(schema))
    } else if (data instanceof ArrowTable) {
      buffer = await fromTableToStreamBuffer(data, embeddings)
    } else {
      // data is Array<Record<...>>
      buffer = await fromRecordsToStreamBuffer(data, embeddings)
    }
    const res = await this._client.post(
      `/v1/table/${encodeURIComponent(tableName)}/create/`,
      buffer,
      undefined,
      'application/vnd.apache.arrow.stream'
    )
    if (res.status !== 200) {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
    this._tableCache.set(tableName, true)
    if (embeddings === undefined) {
      return new RemoteTable(this._client, tableName)
    } else {
      return new RemoteTable(this._client, tableName, embeddings)
    }
  }
  async dropTable (name: string): Promise<void> {
    await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
    this._tableCache.delete(name)
  }
  withMiddleware (middleware: HttpMiddleware): Connection {
    const wrapped = this.clone()
    wrapped._client = wrapped._client.withMiddleware(middleware)
    return wrapped
  }
  private clone (): RemoteConnection {
    const clone: RemoteConnection = Object.create(RemoteConnection.prototype)
    return Object.assign(clone, this)
  }
 }
 export class RemoteQuery<T = number[]> extends Query<T> {
  constructor (
    query: T,
    private readonly _client: HttpLancedbClient,
    private readonly _name: string,
    embeddings?: EmbeddingFunction<T>
  ) {
    super(query, undefined, embeddings)
  }
  // TODO: refactor this to a base class + queryImpl pattern
  async execute<T = Record<string, unknown>>(): Promise<T[]> {
    const embeddings = this._embeddings
    const query = (this as any)._query
    let queryVector: number[]
    if (embeddings !== undefined) {
      queryVector = (await embeddings.embed([query]))[0]
    } else {
      queryVector = query as number[]
    }
    const data = await this._client.search(
      this._name,
      queryVector,
      (this as any)._limit,
      (this as any)._nprobes,
      (this as any)._prefilter,
      (this as any)._refineFactor,
      (this as any)._select,
      (this as any)._filter,
      (this as any)._metricType,
      (this as any)._fastSearch
    )
    return data.toArray().map((entry: Record<string, unknown>) => {
      const newObject: Record<string, unknown> = {}
      Object.keys(entry).forEach((key: string) => {
        if (entry[key] instanceof Vector) {
          newObject[key] = (entry[key] as any).toArray()
        } else {
          newObject[key] = entry[key] as any
        }
      })
      return newObject as unknown as T
    })
  }
 }
 // we are using extend until we have next next version release
 // Table and Connection has both been refactored to interfaces
 export class RemoteTable<T = number[]> implements Table<T> {
  private _client: HttpLancedbClient
  private readonly _embeddings?: EmbeddingFunction<T>
  private readonly _name: string
  constructor (client: HttpLancedbClient, name: string)
  constructor (
    client: HttpLancedbClient,
    name: string,
    embeddings: EmbeddingFunction<T>
  )
  constructor (
    client: HttpLancedbClient,
    name: string,
    embeddings?: EmbeddingFunction<T>
  ) {
    this._client = client
    this._name = name
    this._embeddings = embeddings
  }
  get name (): string {
    return this._name
  }
  get schema (): Promise<any> {
    return this._client
      .post(`/v1/table/${encodeURIComponent(this._name)}/describe/`)
      .then(async (res) => {
        if (res.status !== 200) {
          throw new Error(
            `Server Error, status: ${res.status}, ` +
              // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
              `message: ${res.statusText}: ${await res.body()}`
          )
        }
        return (await res.body())?.schema
      })
  }
  search (query: T): Query<T> {
    return new RemoteQuery(query, this._client, encodeURIComponent(this._name)) //, this._embeddings_new)
  }
  filter (where: string): Query<T> {
    throw new Error('Not implemented')
  }
  async mergeInsert (on: string, data: Array<Record<string, unknown>> | ArrowTable, args: MergeInsertArgs): Promise<void> {
    let tbl: ArrowTable
    if (data instanceof ArrowTable) {
      tbl = data
    } else {
      tbl = makeArrowTable(data, await this.schema)
    }
    const queryParams: any = {
      on
    }
    if (args.whenMatchedUpdateAll !== false && args.whenMatchedUpdateAll !== null && args.whenMatchedUpdateAll !== undefined) {
      queryParams.when_matched_update_all = 'true'
      if (typeof args.whenMatchedUpdateAll === 'string') {
        queryParams.when_matched_update_all_filt = args.whenMatchedUpdateAll
      }
    } else {
      queryParams.when_matched_update_all = 'false'
    }
    if (args.whenNotMatchedInsertAll ?? false) {
      queryParams.when_not_matched_insert_all = 'true'
    } else {
      queryParams.when_not_matched_insert_all = 'false'
    }
    if (args.whenNotMatchedBySourceDelete !== false && args.whenNotMatchedBySourceDelete !== null && args.whenNotMatchedBySourceDelete !== undefined) {
      queryParams.when_not_matched_by_source_delete = 'true'
      if (typeof args.whenNotMatchedBySourceDelete === 'string') {
        queryParams.when_not_matched_by_source_delete_filt = args.whenNotMatchedBySourceDelete
      }
    } else {
      queryParams.when_not_matched_by_source_delete = 'false'
    }
    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
    const res = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/merge_insert/`,
      buffer,
      queryParams,
      'application/vnd.apache.arrow.stream'
    )
    if (res.status !== 200) {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }
  async add (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
    let tbl: ArrowTable
    if (data instanceof ArrowTable) {
      tbl = data
    } else {
      tbl = makeArrowTable(data, await this.schema)
    }
    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
    const res = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
      buffer,
      {
        mode: 'append'
      },
      'application/vnd.apache.arrow.stream'
    )
    if (res.status !== 200) {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
    return tbl.numRows
  }
  async overwrite (data: Array<Record<string, unknown>> | ArrowTable): Promise<number> {
    let tbl: ArrowTable
    if (data instanceof ArrowTable) {
      tbl = data
    } else {
      tbl = makeArrowTable(data)
    }
    const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
    const res = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/insert/`,
      buffer,
      {
        mode: 'overwrite'
      },
      'application/vnd.apache.arrow.stream'
    )
    if (res.status !== 200) {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
    return tbl.numRows
  }
  async createIndex (indexParams: VectorIndexParams): Promise<void> {
    const unsupportedParams = [
      'index_name',
      'num_partitions',
      'max_iters',
      'use_opq',
      'num_sub_vectors',
      'num_bits',
      'max_opq_iters',
      'replace'
    ]
    for (const param of unsupportedParams) {
      // eslint-disable-next-line @typescript-eslint/strict-boolean-expressions
      if (indexParams[param as keyof VectorIndexParams]) {
        throw new Error(`${param} is not supported for remote connections`)
      }
    }
    const column = indexParams.column ?? 'vector'
    const indexType = 'vector'
    const metricType = indexParams.metric_type ?? 'L2'
    const indexCacheSize = indexParams.index_cache_size ?? null
    const data = {
      column,
      index_type: indexType,
      metric_type: metricType,
      index_cache_size: indexCacheSize
    }
    const res = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/create_index/`,
      data
    )
    if (res.status !== 200) {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }
  async createScalarIndex (column: string): Promise<void> {
    const indexType = 'scalar'
    const data = {
      column,
      index_type: indexType,
      replace: true
    }
    const res = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/create_scalar_index/`,
      data
    )
    if (res.status !== 200) {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }
  async dropIndex (index_name: string): Promise<void> {
    const res = await this._client.post(
        `/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
    )
    if (res.status !== 200) {
      throw new Error(
          `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }
  async countRows (filter?: string): Promise<number> {
    const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
      predicate: filter
    })
    return (await result.body())
  }
  async delete (filter: string): Promise<void> {
    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/delete/`, {
      predicate: filter
    })
  }
  async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
    let filter: string | null
    let updates: Record<string, string>
    if ('valuesSql' in args) {
      filter = args.where ?? null
      updates = args.valuesSql
    } else {
      filter = args.where ?? null
      updates = {}
      for (const [key, value] of Object.entries(args.values)) {
        updates[key] = toSQL(value)
      }
    }
    await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/update/`, {
      predicate: filter,
      updates: Object.entries(updates).map(([key, value]) => [key, value])
    })
  }
  async listIndices (): Promise<VectorIndex[]> {
    const results = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/index/list/`
    )
    return (await results.body()).indexes?.map((index: any) => ({
      columns: index.columns,
      name: index.index_name,
      uuid: index.index_uuid,
      status: index.status
    }))
  }
  async indexStats (indexName: string): Promise<IndexStats> {
    const results = await this._client.post(
      `/v1/table/${encodeURIComponent(this._name)}/index/${indexName}/stats/`
    )
    const body = await results.body()
    return {
      numIndexedRows: body?.num_indexed_rows,
      numUnindexedRows: body?.num_unindexed_rows,
      indexType: body?.index_type,
      distanceType: body?.distance_type
    }
  }
  async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
    throw new Error('Add columns is not yet supported in LanceDB Cloud.')
  }
  async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
    throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
  }
  async dropColumns (columnNames: string[]): Promise<void> {
    throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
  }
  withMiddleware(middleware: HttpMiddleware): Table<T> {
    const wrapped = this.clone()
    wrapped._client = wrapped._client.withMiddleware(middleware)
    return wrapped
  }
  private clone (): RemoteTable<T> {
    const clone: RemoteTable<T> = Object.create(RemoteTable.prototype)
    return Object.assign(clone, this)
  }
 }
--- a/node/src/sanitize.ts
+++ b/node/src/sanitize.ts
@@ -1,508 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // The utilities in this file help sanitize data from the user's arrow
 // library into the types expected by vectordb's arrow library.  Node
 // generally allows for mulitple versions of the same library (and sometimes
 // even multiple copies of the same version) to be installed at the same
 // time.  However, arrow-js uses instanceof which expected that the input
 // comes from the exact same library instance.  This is not always the case
 // and so we must sanitize the input to ensure that it is compatible.
 import {
  Field,
  Utf8,
  FixedSizeBinary,
  FixedSizeList,
  Schema,
  List,
  Struct,
  Float,
  Bool,
  Date_,
  Decimal,
  type DataType,
  Dictionary,
  Binary,
  Float32,
  Interval,
  Map_,
  Duration,
  Union,
  Time,
  Timestamp,
  Type,
  Null,
  Int,
  type Precision,
  type DateUnit,
  Int8,
  Int16,
  Int32,
  Int64,
  Uint8,
  Uint16,
  Uint32,
  Uint64,
  Float16,
  Float64,
  DateDay,
  DateMillisecond,
  DenseUnion,
  SparseUnion,
  TimeNanosecond,
  TimeMicrosecond,
  TimeMillisecond,
  TimeSecond,
  TimestampNanosecond,
  TimestampMicrosecond,
  TimestampMillisecond,
  TimestampSecond,
  IntervalDayTime,
  IntervalYearMonth,
  DurationNanosecond,
  DurationMicrosecond,
  DurationMillisecond,
  DurationSecond
 } from "apache-arrow";
 import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type";
 function sanitizeMetadata(
  metadataLike?: unknown
 ): Map<string, string> | undefined {
  if (metadataLike === undefined || metadataLike === null) {
    return undefined;
  }
  if (!(metadataLike instanceof Map)) {
    throw Error("Expected metadata, if present, to be a Map<string, string>");
  }
  for (const item of metadataLike) {
    if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) {
      throw Error(
        "Expected metadata, if present, to be a Map<string, string> but it had non-string keys or values"
      );
    }
  }
  return metadataLike as Map<string, string>;
 }
 function sanitizeInt(typeLike: object) {
  if (
    !("bitWidth" in typeLike) ||
    typeof typeLike.bitWidth !== "number" ||
    !("isSigned" in typeLike) ||
    typeof typeLike.isSigned !== "boolean"
  ) {
    throw Error(
      "Expected an Int Type to have a `bitWidth` and `isSigned` property"
    );
  }
  return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
 }
 function sanitizeFloat(typeLike: object) {
  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
    throw Error("Expected a Float Type to have a `precision` property");
  }
  return new Float(typeLike.precision as Precision);
 }
 function sanitizeDecimal(typeLike: object) {
  if (
    !("scale" in typeLike) ||
    typeof typeLike.scale !== "number" ||
    !("precision" in typeLike) ||
    typeof typeLike.precision !== "number" ||
    !("bitWidth" in typeLike) ||
    typeof typeLike.bitWidth !== "number"
  ) {
    throw Error(
      "Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties"
    );
  }
  return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
 }
 function sanitizeDate(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected a Date type to have a `unit` property");
  }
  return new Date_(typeLike.unit as DateUnit);
 }
 function sanitizeTime(typeLike: object) {
  if (
    !("unit" in typeLike) ||
    typeof typeLike.unit !== "number" ||
    !("bitWidth" in typeLike) ||
    typeof typeLike.bitWidth !== "number"
  ) {
    throw Error(
      "Expected a Time type to have `unit` and `bitWidth` properties"
    );
  }
  return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
 }
 function sanitizeTimestamp(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected a Timestamp type to have a `unit` property");
  }
  let timezone = null;
  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
    timezone = typeLike.timezone;
  }
  return new Timestamp(typeLike.unit, timezone);
 }
 function sanitizeTypedTimestamp(
  typeLike: object,
  Datatype:
    | typeof TimestampNanosecond
    | typeof TimestampMicrosecond
    | typeof TimestampMillisecond
    | typeof TimestampSecond
 ) {
  let timezone = null;
  if ("timezone" in typeLike && typeof typeLike.timezone === "string") {
    timezone = typeLike.timezone;
  }
  return new Datatype(timezone);
 }
 function sanitizeInterval(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected an Interval type to have a `unit` property");
  }
  return new Interval(typeLike.unit);
 }
 function sanitizeList(typeLike: object) {
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a List type to have an array-like `children` property"
    );
  }
  if (typeLike.children.length !== 1) {
    throw Error("Expected a List type to have exactly one child");
  }
  return new List(sanitizeField(typeLike.children[0]));
 }
 function sanitizeStruct(typeLike: object) {
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a Struct type to have an array-like `children` property"
    );
  }
  return new Struct(typeLike.children.map((child) => sanitizeField(child)));
 }
 function sanitizeUnion(typeLike: object) {
  if (
    !("typeIds" in typeLike) ||
    !("mode" in typeLike) ||
    typeof typeLike.mode !== "number"
  ) {
    throw Error(
      "Expected a Union type to have `typeIds` and `mode` properties"
    );
  }
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a Union type to have an array-like `children` property"
    );
  }
  return new Union(
    typeLike.mode,
    typeLike.typeIds as any,
    typeLike.children.map((child) => sanitizeField(child))
  );
 }
 function sanitizeTypedUnion(
  typeLike: object,
  UnionType: typeof DenseUnion | typeof SparseUnion
 ) {
  if (!("typeIds" in typeLike)) {
    throw Error(
      "Expected a DenseUnion/SparseUnion type to have a `typeIds` property"
    );
  }
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a DenseUnion/SparseUnion type to have an array-like `children` property"
    );
  }
  return new UnionType(
    typeLike.typeIds as any,
    typeLike.children.map((child) => sanitizeField(child))
  );
 }
 function sanitizeFixedSizeBinary(typeLike: object) {
  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
    throw Error(
      "Expected a FixedSizeBinary type to have a `byteWidth` property"
    );
  }
  return new FixedSizeBinary(typeLike.byteWidth);
 }
 function sanitizeFixedSizeList(typeLike: object) {
  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
    throw Error("Expected a FixedSizeList type to have a `listSize` property");
  }
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a FixedSizeList type to have an array-like `children` property"
    );
  }
  if (typeLike.children.length !== 1) {
    throw Error("Expected a FixedSizeList type to have exactly one child");
  }
  return new FixedSizeList(
    typeLike.listSize,
    sanitizeField(typeLike.children[0])
  );
 }
 function sanitizeMap(typeLike: object) {
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a Map type to have an array-like `children` property"
    );
  }
  if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") {
    throw Error("Expected a Map type to have a `keysSorted` property");
  }
  return new Map_(
    typeLike.children.map((field) => sanitizeField(field)) as any,
    typeLike.keysSorted
  );
 }
 function sanitizeDuration(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected a Duration type to have a `unit` property");
  }
  return new Duration(typeLike.unit);
 }
 function sanitizeDictionary(typeLike: object) {
  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
    throw Error("Expected a Dictionary type to have an `id` property");
  }
  if (!("indices" in typeLike) || typeof typeLike.indices !== "object") {
    throw Error("Expected a Dictionary type to have an `indices` property");
  }
  if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") {
    throw Error("Expected a Dictionary type to have an `dictionary` property");
  }
  if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") {
    throw Error("Expected a Dictionary type to have an `isOrdered` property");
  }
  return new Dictionary(
    sanitizeType(typeLike.dictionary),
    sanitizeType(typeLike.indices) as any,
    typeLike.id,
    typeLike.isOrdered
  );
 }
 function sanitizeType(typeLike: unknown): DataType<any> {
  if (typeof typeLike !== "object" || typeLike === null) {
    throw Error("Expected a Type but object was null/undefined");
  }
  if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) {
    throw Error("Expected a Type to have a typeId function");
  }
  let typeId: Type;
  if (typeof typeLike.typeId === "function") {
    typeId = (typeLike.typeId as () => unknown)() as Type;
  } else if (typeof typeLike.typeId === "number") {
    typeId = typeLike.typeId as Type;
  } else {
    throw Error("Type's typeId property was not a function or number");
  }
  switch (typeId) {
    case Type.NONE:
      throw Error("Received a Type with a typeId of NONE");
    case Type.Null:
      return new Null();
    case Type.Int:
      return sanitizeInt(typeLike);
    case Type.Float:
      return sanitizeFloat(typeLike);
    case Type.Binary:
      return new Binary();
    case Type.Utf8:
      return new Utf8();
    case Type.Bool:
      return new Bool();
    case Type.Decimal:
      return sanitizeDecimal(typeLike);
    case Type.Date:
      return sanitizeDate(typeLike);
    case Type.Time:
      return sanitizeTime(typeLike);
    case Type.Timestamp:
      return sanitizeTimestamp(typeLike);
    case Type.Interval:
      return sanitizeInterval(typeLike);
    case Type.List:
      return sanitizeList(typeLike);
    case Type.Struct:
      return sanitizeStruct(typeLike);
    case Type.Union:
      return sanitizeUnion(typeLike);
    case Type.FixedSizeBinary:
      return sanitizeFixedSizeBinary(typeLike);
    case Type.FixedSizeList:
      return sanitizeFixedSizeList(typeLike);
    case Type.Map:
      return sanitizeMap(typeLike);
    case Type.Duration:
      return sanitizeDuration(typeLike);
    case Type.Dictionary:
      return sanitizeDictionary(typeLike);
    case Type.Int8:
      return new Int8();
    case Type.Int16:
      return new Int16();
    case Type.Int32:
      return new Int32();
    case Type.Int64:
      return new Int64();
    case Type.Uint8:
      return new Uint8();
    case Type.Uint16:
      return new Uint16();
    case Type.Uint32:
      return new Uint32();
    case Type.Uint64:
      return new Uint64();
    case Type.Float16:
      return new Float16();
    case Type.Float32:
      return new Float32();
    case Type.Float64:
      return new Float64();
    case Type.DateMillisecond:
      return new DateMillisecond();
    case Type.DateDay:
      return new DateDay();
    case Type.TimeNanosecond:
      return new TimeNanosecond();
    case Type.TimeMicrosecond:
      return new TimeMicrosecond();
    case Type.TimeMillisecond:
      return new TimeMillisecond();
    case Type.TimeSecond:
      return new TimeSecond();
    case Type.TimestampNanosecond:
      return sanitizeTypedTimestamp(typeLike, TimestampNanosecond);
    case Type.TimestampMicrosecond:
      return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond);
    case Type.TimestampMillisecond:
      return sanitizeTypedTimestamp(typeLike, TimestampMillisecond);
    case Type.TimestampSecond:
      return sanitizeTypedTimestamp(typeLike, TimestampSecond);
    case Type.DenseUnion:
      return sanitizeTypedUnion(typeLike, DenseUnion);
    case Type.SparseUnion:
      return sanitizeTypedUnion(typeLike, SparseUnion);
    case Type.IntervalDayTime:
      return new IntervalDayTime();
    case Type.IntervalYearMonth:
      return new IntervalYearMonth();
    case Type.DurationNanosecond:
      return new DurationNanosecond();
    case Type.DurationMicrosecond:
      return new DurationMicrosecond();
    case Type.DurationMillisecond:
      return new DurationMillisecond();
    case Type.DurationSecond:
      return new DurationSecond();
  }
 }
 function sanitizeField(fieldLike: unknown): Field {
  if (fieldLike instanceof Field) {
    return fieldLike;
  }
  if (typeof fieldLike !== "object" || fieldLike === null) {
    throw Error("Expected a Field but object was null/undefined");
  }
  if (
    !("type" in fieldLike) ||
    !("name" in fieldLike) ||
    !("nullable" in fieldLike)
  ) {
    throw Error(
      "The field passed in is missing a `type`/`name`/`nullable` property"
    );
  }
  const type = sanitizeType(fieldLike.type);
  const name = fieldLike.name;
  if (!(typeof name === "string")) {
    throw Error("The field passed in had a non-string `name` property");
  }
  const nullable = fieldLike.nullable;
  if (!(typeof nullable === "boolean")) {
    throw Error("The field passed in had a non-boolean `nullable` property");
  }
  let metadata;
  if ("metadata" in fieldLike) {
    metadata = sanitizeMetadata(fieldLike.metadata);
  }
  return new Field(name, type, nullable, metadata);
 }
 /**
 * Convert something schemaLike into a Schema instance
 *
 * This method is often needed even when the caller is using a Schema
 * instance because they might be using a different instance of apache-arrow
 * than lancedb is using.
 */
 export function sanitizeSchema(schemaLike: unknown): Schema {
  if (schemaLike instanceof Schema) {
    return schemaLike;
  }
  if (typeof schemaLike !== "object" || schemaLike === null) {
    throw Error("Expected a Schema but object was null/undefined");
  }
  if (!("fields" in schemaLike)) {
    throw Error(
      "The schema passed in does not appear to be a schema (no 'fields' property)"
    );
  }
  let metadata;
  if ("metadata" in schemaLike) {
    metadata = sanitizeMetadata(schemaLike.metadata);
  }
  if (!Array.isArray(schemaLike.fields)) {
    throw Error(
      "The schema passed in had a 'fields' property but it was not an array"
    );
  }
  const sanitizedFields = schemaLike.fields.map((field) =>
    sanitizeField(field)
  );
  return new Schema(sanitizedFields, metadata);
 }
--- a/node/src/test/arrow.test.ts
+++ b/node/src/test/arrow.test.ts
@@ -1,360 +0,0 @@
 // Copyright 2024 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { describe } from 'mocha'
 import { assert, expect, use as chaiUse } from 'chai'
 import * as chaiAsPromised from 'chai-as-promised'
 import { convertToTable, fromTableToBuffer, makeArrowTable, makeEmptyTable } from '../arrow'
 import {
  Field,
  FixedSizeList,
  Float16,
  Float32,
  Int32,
  tableFromIPC,
  Schema,
  Float64,
  type Table,
  Binary,
  Bool,
  Utf8,
  Struct,
  List,
  DataType,
  Dictionary,
  Int64,
  MetadataVersion
 } from 'apache-arrow'
 import {
  Dictionary as OldDictionary,
  Field as OldField,
  FixedSizeList as OldFixedSizeList,
  Float32 as OldFloat32,
  Int32 as OldInt32,
  Struct as OldStruct,
  Schema as OldSchema,
  TimestampNanosecond as OldTimestampNanosecond,
  Utf8 as OldUtf8
 } from 'apache-arrow-old'
 import { type EmbeddingFunction } from '../embedding/embedding_function'
 chaiUse(chaiAsPromised)
 function sampleRecords (): Array<Record<string, any>> {
  return [
    {
      binary: Buffer.alloc(5),
      boolean: false,
      number: 7,
      string: 'hello',
      struct: { x: 0, y: 0 },
      list: ['anime', 'action', 'comedy']
    }
  ]
 }
 // Helper method to verify various ways to create a table
 async function checkTableCreation (tableCreationMethod: (records: any, recordsReversed: any, schema: Schema) => Promise<Table>): Promise<void> {
  const records = sampleRecords()
  const recordsReversed = [{
    list: ['anime', 'action', 'comedy'],
    struct: { x: 0, y: 0 },
    string: 'hello',
    number: 7,
    boolean: false,
    binary: Buffer.alloc(5)
  }]
  const schema = new Schema([
    new Field('binary', new Binary(), false),
    new Field('boolean', new Bool(), false),
    new Field('number', new Float64(), false),
    new Field('string', new Utf8(), false),
    new Field('struct', new Struct([
      new Field('x', new Float64(), false),
      new Field('y', new Float64(), false)
    ])),
    new Field('list', new List(new Field('item', new Utf8(), false)), false)
  ])
  const table = await tableCreationMethod(records, recordsReversed, schema)
  schema.fields.forEach((field, idx) => {
    const actualField = table.schema.fields[idx]
    assert.isFalse(actualField.nullable)
    assert.equal(table.getChild(field.name)?.type.toString(), field.type.toString())
    assert.equal(table.getChildAt(idx)?.type.toString(), field.type.toString())
  })
 }
 describe('The function makeArrowTable', function () {
  it('will use data types from a provided schema instead of inference', async function () {
    const schema = new Schema([
      new Field('a', new Int32()),
      new Field('b', new Float32()),
      new Field('c', new FixedSizeList(3, new Field('item', new Float16()))),
      new Field('d', new Int64())
    ])
    const table = makeArrowTable(
      [
        { a: 1, b: 2, c: [1, 2, 3], d: 9 },
        { a: 4, b: 5, c: [4, 5, 6], d: 10 },
        { a: 7, b: 8, c: [7, 8, 9], d: null }
      ],
      { schema }
    )
    const buf = await fromTableToBuffer(table)
    assert.isAbove(buf.byteLength, 0)
    const actual = tableFromIPC(buf)
    assert.equal(actual.numRows, 3)
    const actualSchema = actual.schema
    assert.deepEqual(actualSchema, schema)
  })
  it('will assume the column `vector` is FixedSizeList<Float32> by default', async function () {
    const schema = new Schema([
      new Field('a', new Float64()),
      new Field('b', new Float64()),
      new Field(
        'vector',
        new FixedSizeList(3, new Field('item', new Float32(), true))
      )
    ])
    const table = makeArrowTable([
      { a: 1, b: 2, vector: [1, 2, 3] },
      { a: 4, b: 5, vector: [4, 5, 6] },
      { a: 7, b: 8, vector: [7, 8, 9] }
    ])
    const buf = await fromTableToBuffer(table)
    assert.isAbove(buf.byteLength, 0)
    const actual = tableFromIPC(buf)
    assert.equal(actual.numRows, 3)
    const actualSchema = actual.schema
    assert.deepEqual(actualSchema, schema)
  })
  it('can support multiple vector columns', async function () {
    const schema = new Schema([
      new Field('a', new Float64()),
      new Field('b', new Float64()),
      new Field('vec1', new FixedSizeList(3, new Field('item', new Float16(), true))),
      new Field('vec2', new FixedSizeList(3, new Field('item', new Float16(), true)))
    ])
    const table = makeArrowTable(
      [
        { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
        { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
        { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
      ],
      {
        vectorColumns: {
          vec1: { type: new Float16() },
          vec2: { type: new Float16() }
        }
      }
    )
    const buf = await fromTableToBuffer(table)
    assert.isAbove(buf.byteLength, 0)
    const actual = tableFromIPC(buf)
    assert.equal(actual.numRows, 3)
    const actualSchema = actual.schema
    assert.deepEqual(actualSchema, schema)
  })
  it('will allow different vector column types', async function () {
    const table = makeArrowTable(
      [
        { fp16: [1], fp32: [1], fp64: [1] }
      ],
      {
        vectorColumns: {
          fp16: { type: new Float16() },
          fp32: { type: new Float32() },
          fp64: { type: new Float64() }
        }
      }
    )
    assert.equal(table.getChild('fp16')?.type.children[0].type.toString(), new Float16().toString())
    assert.equal(table.getChild('fp32')?.type.children[0].type.toString(), new Float32().toString())
    assert.equal(table.getChild('fp64')?.type.children[0].type.toString(), new Float64().toString())
  })
  it('will use dictionary encoded strings if asked', async function () {
    const table = makeArrowTable([{ str: 'hello' }])
    assert.isTrue(DataType.isUtf8(table.getChild('str')?.type))
    const tableWithDict = makeArrowTable([{ str: 'hello' }], { dictionaryEncodeStrings: true })
    assert.isTrue(DataType.isDictionary(tableWithDict.getChild('str')?.type))
    const schema = new Schema([
      new Field('str', new Dictionary(new Utf8(), new Int32()))
    ])
    const tableWithDict2 = makeArrowTable([{ str: 'hello' }], { schema })
    assert.isTrue(DataType.isDictionary(tableWithDict2.getChild('str')?.type))
  })
  it('will infer data types correctly', async function () {
    await checkTableCreation(async (records) => makeArrowTable(records))
  })
  it('will allow a schema to be provided', async function () {
    await checkTableCreation(async (records, _, schema) => makeArrowTable(records, { schema }))
  })
  it('will use the field order of any provided schema', async function () {
    await checkTableCreation(async (_, recordsReversed, schema) => makeArrowTable(recordsReversed, { schema }))
  })
  it('will make an empty table', async function () {
    await checkTableCreation(async (_, __, schema) => makeArrowTable([], { schema }))
  })
 })
 class DummyEmbedding implements EmbeddingFunction<string> {
  public readonly sourceColumn = 'string'
  public readonly embeddingDimension = 2
  public readonly embeddingDataType = new Float16()
  async embed (data: string[]): Promise<number[][]> {
    return data.map(
      () => [0.0, 0.0]
    )
  }
 }
 class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
  public readonly sourceColumn = 'string'
  async embed (data: string[]): Promise<number[][]> {
    return data.map(
      () => [0.0, 0.0]
    )
  }
 }
 describe('convertToTable', function () {
  it('will infer data types correctly', async function () {
    await checkTableCreation(async (records) => await convertToTable(records))
  })
  it('will allow a schema to be provided', async function () {
    await checkTableCreation(async (records, _, schema) => await convertToTable(records, undefined, { schema }))
  })
  it('will use the field order of any provided schema', async function () {
    await checkTableCreation(async (_, recordsReversed, schema) => await convertToTable(recordsReversed, undefined, { schema }))
  })
  it('will make an empty table', async function () {
    await checkTableCreation(async (_, __, schema) => await convertToTable([], undefined, { schema }))
  })
  it('will apply embeddings', async function () {
    const records = sampleRecords()
    const table = await convertToTable(records, new DummyEmbedding())
    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
  })
  it('will fail if missing the embedding source column', async function () {
    return await expect(convertToTable([{ id: 1 }], new DummyEmbedding())).to.be.rejectedWith("'string' was not present")
  })
  it('use embeddingDimension if embedding missing from table', async function () {
    const schema = new Schema([
      new Field('string', new Utf8(), false)
    ])
    // Simulate getting an empty Arrow table (minus embedding) from some other source
    // In other words, we aren't starting with records
    const table = makeEmptyTable(schema)
    // If the embedding specifies the dimension we are fine
    await fromTableToBuffer(table, new DummyEmbedding())
    // We can also supply a schema and should be ok
    const schemaWithEmbedding = new Schema([
      new Field('string', new Utf8(), false),
      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
    ])
    await fromTableToBuffer(table, new DummyEmbeddingWithNoDimension(), schemaWithEmbedding)
    // Otherwise we will get an error
    return await expect(fromTableToBuffer(table, new DummyEmbeddingWithNoDimension())).to.be.rejectedWith('does not specify `embeddingDimension`')
  })
  it('will apply embeddings to an empty table', async function () {
    const schema = new Schema([
      new Field('string', new Utf8(), false),
      new Field('vector', new FixedSizeList(2, new Field('item', new Float16(), false)), false)
    ])
    const table = await convertToTable([], new DummyEmbedding(), { schema })
    assert.isTrue(DataType.isFixedSizeList(table.getChild('vector')?.type))
    assert.equal(table.getChild('vector')?.type.children[0].type.toString(), new Float16().toString())
  })
  it('will complain if embeddings present but schema missing embedding column', async function () {
    const schema = new Schema([
      new Field('string', new Utf8(), false)
    ])
    return await expect(convertToTable([], new DummyEmbedding(), { schema })).to.be.rejectedWith('column vector was missing')
  })
  it('will provide a nice error if run twice', async function () {
    const records = sampleRecords()
    const table = await convertToTable(records, new DummyEmbedding())
    // fromTableToBuffer will try and apply the embeddings again
    return await expect(fromTableToBuffer(table, new DummyEmbedding())).to.be.rejectedWith('already existed')
  })
 })
 describe('makeEmptyTable', function () {
  it('will make an empty table', async function () {
    await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema))
  })
 })
 describe('when using two versions of arrow', function () {
  it('can still import data', async function() {
    const schema = new OldSchema([
      new OldField('id', new OldInt32()),
      new OldField('vector', new OldFixedSizeList(1024, new OldField("item", new OldFloat32(), true))),
      new OldField('struct', new OldStruct([
        new OldField('nested', new OldDictionary(new OldUtf8(), new OldInt32(), 1, true)),
        new OldField('ts_with_tz', new OldTimestampNanosecond("some_tz")),
        new OldField('ts_no_tz', new OldTimestampNanosecond(null))
      ]))
    ]) as any
    // We use arrow version 13 to emulate a "foreign arrow" and this version doesn't have metadataVersion
    // In theory, this wouldn't matter.  We don't rely on that property.  However, it causes deepEqual to
    // fail so we patch it back in
    schema.metadataVersion = MetadataVersion.V5
    const table = makeArrowTable(
      [],
      { schema }
    )
    const buf = await fromTableToBuffer(table)
    assert.isAbove(buf.byteLength, 0)
    const actual = tableFromIPC(buf)
    const actualSchema = actual.schema
    assert.deepEqual(actualSchema, schema)
  })
 })
--- a/node/src/test/embedding/openai.ts
+++ b/node/src/test/embedding/openai.ts
@@ -1,55 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { describe } from 'mocha'
 import { assert } from 'chai'
 import { OpenAIEmbeddingFunction } from '../../embedding/openai'
 import { isEmbeddingFunction } from '../../embedding/embedding_function'
 // eslint-disable-next-line @typescript-eslint/no-var-requires
 const OpenAIApi = require('openai')
 // eslint-disable-next-line @typescript-eslint/no-var-requires
 const { stub } = require('sinon')
 describe('OpenAPIEmbeddings', function () {
  const stubValue = {
    data: [
      {
        embedding: Array(1536).fill(1.0)
      },
      {
        embedding: Array(1536).fill(2.0)
      }
    ]
  }
  describe('#embed', function () {
    it('should create vector embeddings', async function () {
      const openAIStub = stub(OpenAIApi.Embeddings.prototype, 'create').returns(stubValue)
      const f = new OpenAIEmbeddingFunction('text', 'sk-key')
      const vectors = await f.embed(['abc', 'def'])
      assert.isTrue(openAIStub.calledOnce)
      assert.equal(vectors.length, 2)
      assert.deepEqual(vectors[0], stubValue.data[0].embedding)
      assert.deepEqual(vectors[1], stubValue.data[1].embedding)
    })
  })
  describe('isEmbeddingFunction', function () {
    it('should match the isEmbeddingFunction guard', function () {
      assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
    })
  })
 })
--- a/node/src/test/io.ts
+++ b/node/src/test/io.ts
@@ -1,76 +0,0 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 // IO tests
 import { describe } from 'mocha'
 import { assert } from 'chai'
 import * as lancedb from '../index'
 import { type ConnectionOptions } from '../index'
 describe('LanceDB S3 client', function () {
  if (process.env.TEST_S3_BASE_URL != null) {
    const baseUri = process.env.TEST_S3_BASE_URL
    it('should have a valid url', async function () {
      const opts = { uri: `${baseUri}/valid_url` }
      const table = await createTestDB(opts, 2, 20)
      const con = await lancedb.connect(opts)
      assert.equal(con.uri, opts.uri)
      const results = await table.search([0.1, 0.3]).limit(5).execute()
      assert.equal(results.length, 5)
    }).timeout(10_000)
  } else {
    describe.skip('Skip S3 test', function () {})
  }
  if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) {
    const baseUri = process.env.TEST_S3_BASE_URL
    it('use custom credentials', async function () {
      const opts: ConnectionOptions = {
        uri: `${baseUri}/custom_credentials`,
        awsCredentials: {
          accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string,
          secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string
        }
      }
      const table = await createTestDB(opts, 2, 20)
      console.log(table)
      const con = await lancedb.connect(opts)
      console.log(con)
      assert.equal(con.uri, opts.uri)
      const results = await table.search([0.1, 0.3]).limit(5).execute()
      assert.equal(results.length, 5)
    }).timeout(10_000)
  } else {
    describe.skip('Skip S3 test', function () {})
  }
 })
 async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
  const con = await lancedb.connect(opts)
  const data = []
  for (let i = 0; i < numRows; i++) {
    const vector = []
    for (let j = 0; j < numDimensions; j++) {
      vector.push(i + (j * 0.1))
    }
    data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
  }
  return await con.createTable('vectors_2', data)
 }
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
--- a/node/src/test/util.ts
+++ b/node/src/test/util.ts
@@ -1,45 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 import { toSQL } from '../util'
 import * as chai from 'chai'
 const expect = chai.expect
 describe('toSQL', function () {
  it('should turn string to SQL expression', function () {
    expect(toSQL('foo')).to.equal("'foo'")
  })
  it('should turn number to SQL expression', function () {
    expect(toSQL(123)).to.equal('123')
  })
  it('should turn boolean to SQL expression', function () {
    expect(toSQL(true)).to.equal('TRUE')
  })
  it('should turn null to SQL expression', function () {
    expect(toSQL(null)).to.equal('NULL')
  })
  it('should turn Date to SQL expression', function () {
    const date = new Date('05 October 2011 14:48 UTC')
    expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'")
  })
  it('should turn array to SQL expression', function () {
    expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]")
  })
 })
--- a/node/src/util.ts
+++ b/node/src/util.ts
@@ -1,77 +0,0 @@
 // Copyright 2023 LanceDB Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 export type Literal = string | number | boolean | null | Date | Literal[]
 export function toSQL (value: Literal): string {
  if (typeof value === 'string') {
    return `'${value}'`
  }
  if (typeof value === 'number') {
    return value.toString()
  }
  if (typeof value === 'boolean') {
    return value ? 'TRUE' : 'FALSE'
  }
  if (value === null) {
    return 'NULL'
  }
  if (value instanceof Date) {
    return `'${value.toISOString()}'`
  }
  if (Array.isArray(value)) {
    return `[${value.map(toSQL).join(', ')}]`
  }
  // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
 }
 export class TTLCache {
  private readonly cache: Map<string, { value: any, expires: number }>
  /**
   * @param ttl Time to live in milliseconds
   */
  constructor (private readonly ttl: number) {
    this.cache = new Map()
  }
  get (key: string): any | undefined {
    const entry = this.cache.get(key)
    if (entry === undefined) {
      return undefined
    }
    if (entry.expires < Date.now()) {
      this.cache.delete(key)
      return undefined
    }
    return entry.value
  }
  set (key: string, value: any): void {
    this.cache.set(key, { value, expires: Date.now() + this.ttl })
  }
  delete (key: string): void {
    this.cache.delete(key)
  }
 }
--- a/node/tsconfig.json
+++ b/node/tsconfig.json
@@ -1,14 +0,0 @@
 {
  "include": [
    "src/**/*.ts",
    "src/*.ts"
  ],
  "compilerOptions": {
    "target": "ES2020",
    "module": "commonjs",
    "declaration": true,
    "outDir": "./dist",
    "strict": true,
    "sourceMap": true,
  }
 }
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -0,0 +1,13 @@
 These are the typescript bindings of LanceDB.
 The core Rust library is in the `../rust/lancedb` directory, the rust binding
 code is in the `src/` directory and the typescript bindings are in
 the `lancedb/` directory.
 Whenever you change the Rust code, you will need to recompile: `npm run build`.
 Common commands:
 * Build: `npm run build`
 * Lint: `npm run lint`
 * Fix lints: `npm run lint-fix`
 * Test: `npm test`
 * Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.20.1-beta.2"
+version = "0.22.1-beta.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
@@ -18,7 +18,7 @@ arrow-array.workspace = true
 arrow-schema.workspace = true
 env_logger.workspace = true
 futures.workspace = true
-lancedb = { path = "../rust/lancedb" }
+lancedb = { path = "../rust/lancedb", default-features = false }
 napi = { version = "2.16.8", default-features = false, features = [
    "napi9",
    "async"
@@ -36,6 +36,6 @@ aws-lc-rs = "=1.13.0"
 napi-build = "2.1"
 [features]
-default = ["remote"]
+default = ["remote", "lancedb/default"]
 fp16kernels = ["lancedb/fp16kernels"]
 remote = ["lancedb/remote"]
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,7 +1,16 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { Schema } from "apache-arrow";
+import {
  Bool,
  Field,
  Int32,
  List,
  Schema,
  Struct,
  Uint8,
  Utf8,
 } from "apache-arrow";
 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
@@ -11,10 +20,12 @@ import * as arrow18 from "apache-arrow-18";
 import {
  convertToTable,
  fromBufferToRecordBatch,
  fromDataToBuffer,
  fromRecordBatchToBuffer,
  fromTableToBuffer,
  makeArrowTable,
  makeEmptyTable,
  tableFromIPC,
 } from "../lancedb/arrow";
 import {
  EmbeddingFunction,
@@ -253,6 +264,98 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actualSchema).toEqual(schema);
      });
      it("will detect vector columns when name contains 'vector' or 'embedding'", async function () {
        // Test various naming patterns that should be detected as vector columns
        const floatVectorTable = makeArrowTable([
          {
            // Float vectors (use decimal values to ensure they're treated as floats)
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            user_vector: [1.1, 2.2],
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            text_embedding: [3.3, 4.4],
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            doc_embeddings: [5.5, 6.6],
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            my_vector_field: [7.7, 8.8],
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            embedding_model: [9.9, 10.1],
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            VECTOR_COL: [11.1, 12.2], // uppercase
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            Vector_Mixed: [13.3, 14.4], // mixed case
          },
        ]);
        // Check that columns with 'vector' or 'embedding' in name are converted to FixedSizeList
        const floatVectorColumns = [
          "user_vector",
          "text_embedding",
          "doc_embeddings",
          "my_vector_field",
          "embedding_model",
          "VECTOR_COL",
          "Vector_Mixed",
        ];
        for (const columnName of floatVectorColumns) {
          expect(
            DataType.isFixedSizeList(
              floatVectorTable.getChild(columnName)?.type,
            ),
          ).toBe(true);
          // Check that float vectors use Float32 by default
          expect(
            floatVectorTable
              .getChild(columnName)
              ?.type.children[0].type.toString(),
          ).toEqual(new Float32().toString());
        }
        // Test that regular integer arrays still get treated as float vectors
        // (since JavaScript doesn't distinguish integers from floats at runtime)
        const integerArrayTable = makeArrowTable([
          {
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            vector_int: [1, 2], // Regular array with integers - should be Float32
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            embedding_int: [3, 4], // Regular array with integers - should be Float32
          },
        ]);
        const integerArrayColumns = ["vector_int", "embedding_int"];
        for (const columnName of integerArrayColumns) {
          expect(
            DataType.isFixedSizeList(
              integerArrayTable.getChild(columnName)?.type,
            ),
          ).toBe(true);
          // Regular integer arrays should use Float32 (avoiding false positives)
          expect(
            integerArrayTable
              .getChild(columnName)
              ?.type.children[0].type.toString(),
          ).toEqual(new Float32().toString());
        }
        // Test normal list should NOT be converted to FixedSizeList
        const normalListTable = makeArrowTable([
          {
            // biome-ignore lint/style/useNamingConvention: Testing vector column detection patterns
            normal_list: [15.5, 16.6], // should NOT be detected as vector
          },
        ]);
        expect(
          DataType.isFixedSizeList(
            normalListTable.getChild("normal_list")?.type,
          ),
        ).toBe(false);
        expect(
          DataType.isList(normalListTable.getChild("normal_list")?.type),
        ).toBe(true);
      });
      it("will allow different vector column types", async function () {
        const table = makeArrowTable([{ fp16: [1], fp32: [1], fp64: [1] }], {
          vectorColumns: {
@@ -375,8 +478,221 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(table2.schema).toEqual(schema);
      });
      it("will handle missing columns in schema alignment when using embeddings", async function () {
        const schema = new Schema(
          [
            new Field("domain", new Utf8(), true),
            new Field("name", new Utf8(), true),
            new Field("description", new Utf8(), true),
          ],
          new Map([["embedding_functions", JSON.stringify([])]]),
        );
        const data = [
          { domain: "google.com", name: "Google" },
          { domain: "facebook.com", name: "Facebook" },
        ];
        const table = await convertToTable(data, undefined, { schema });
        expect(table.numCols).toBe(3);
        expect(table.numRows).toBe(2);
        const descriptionColumn = table.getChild("description");
        expect(descriptionColumn).toBeDefined();
        expect(descriptionColumn?.nullCount).toBe(2);
        expect(descriptionColumn?.toArray()).toEqual([null, null]);
        expect(table.getChild("domain")?.toArray()).toEqual([
          "google.com",
          "facebook.com",
        ]);
        expect(table.getChild("name")?.toArray()).toEqual([
          "Google",
          "Facebook",
        ]);
      });
      it("will handle completely missing nested struct columns", async function () {
        const schema = new Schema(
          [
            new Field("id", new Utf8(), true),
            new Field("name", new Utf8(), true),
            new Field(
              "metadata",
              new Struct([
                new Field("version", new Int32(), true),
                new Field("author", new Utf8(), true),
                new Field(
                  "tags",
                  new List(new Field("item", new Utf8(), true)),
                  true,
                ),
              ]),
              true,
            ),
          ],
          new Map([["embedding_functions", JSON.stringify([])]]),
        );
        const data = [
          { id: "doc1", name: "Document 1" },
          { id: "doc2", name: "Document 2" },
        ];
        const table = await convertToTable(data, undefined, { schema });
        expect(table.numCols).toBe(3);
        expect(table.numRows).toBe(2);
        const buf = await fromTableToBuffer(table);
        const retrievedTable = tableFromIPC(buf);
        const rows = [];
        for (let i = 0; i < retrievedTable.numRows; i++) {
          rows.push(retrievedTable.get(i));
        }
        expect(rows[0].metadata.version).toBe(null);
        expect(rows[0].metadata.author).toBe(null);
        expect(rows[0].metadata.tags).toBe(null);
        expect(rows[0].id).toBe("doc1");
        expect(rows[0].name).toBe("Document 1");
      });
      it("will handle partially missing nested struct fields", async function () {
        const schema = new Schema(
          [
            new Field("id", new Utf8(), true),
            new Field(
              "metadata",
              new Struct([
                new Field("version", new Int32(), true),
                new Field("author", new Utf8(), true),
                new Field("created_at", new Utf8(), true),
              ]),
              true,
            ),
          ],
          new Map([["embedding_functions", JSON.stringify([])]]),
        );
        const data = [
          { id: "doc1", metadata: { version: 1, author: "Alice" } },
          { id: "doc2", metadata: { version: 2 } },
        ];
        const table = await convertToTable(data, undefined, { schema });
        expect(table.numCols).toBe(2);
        expect(table.numRows).toBe(2);
        const metadataColumn = table.getChild("metadata");
        expect(metadataColumn).toBeDefined();
        expect(metadataColumn?.type.toString()).toBe(
          "Struct<{version:Int32, author:Utf8, created_at:Utf8}>",
        );
      });
      it("will handle multiple levels of nested structures", async function () {
        const schema = new Schema(
          [
            new Field("id", new Utf8(), true),
            new Field(
              "config",
              new Struct([
                new Field("database", new Utf8(), true),
                new Field(
                  "connection",
                  new Struct([
                    new Field("host", new Utf8(), true),
                    new Field("port", new Int32(), true),
                    new Field(
                      "ssl",
                      new Struct([
                        new Field("enabled", new Bool(), true),
                        new Field("cert_path", new Utf8(), true),
                      ]),
                      true,
                    ),
                  ]),
                  true,
                ),
              ]),
              true,
            ),
          ],
          new Map([["embedding_functions", JSON.stringify([])]]),
        );
        const data = [
          {
            id: "config1",
            config: {
              database: "postgres",
              connection: { host: "localhost" },
            },
          },
          {
            id: "config2",
            config: { database: "mysql" },
          },
          {
            id: "config3",
          },
        ];
        const table = await convertToTable(data, undefined, { schema });
        expect(table.numCols).toBe(2);
        expect(table.numRows).toBe(3);
        const configColumn = table.getChild("config");
        expect(configColumn).toBeDefined();
        expect(configColumn?.type.toString()).toBe(
          "Struct<{database:Utf8, connection:Struct<{host:Utf8, port:Int32, ssl:Struct<{enabled:Bool, cert_path:Utf8}>}>}>",
        );
      });
      it("will handle missing columns in Arrow table input when using embeddings", async function () {
        const incompleteTable = makeArrowTable([
          { domain: "google.com", name: "Google" },
          { domain: "facebook.com", name: "Facebook" },
        ]);
        const schema = new Schema(
          [
            new Field("domain", new Utf8(), true),
            new Field("name", new Utf8(), true),
            new Field("description", new Utf8(), true),
          ],
          new Map([["embedding_functions", JSON.stringify([])]]),
        );
        const buf = await fromDataToBuffer(incompleteTable, undefined, schema);
        expect(buf.byteLength).toBeGreaterThan(0);
        const retrievedTable = tableFromIPC(buf);
        expect(retrievedTable.numCols).toBe(3);
        expect(retrievedTable.numRows).toBe(2);
        const descriptionColumn = retrievedTable.getChild("description");
        expect(descriptionColumn).toBeDefined();
        expect(descriptionColumn?.nullCount).toBe(2);
        expect(descriptionColumn?.toArray()).toEqual([null, null]);
        expect(retrievedTable.getChild("domain")?.toArray()).toEqual([
          "google.com",
          "facebook.com",
        ]);
        expect(retrievedTable.getChild("name")?.toArray()).toEqual([
          "Google",
          "Facebook",
        ]);
      });
      it("should correctly retain values in nested struct fields", async function () {
        // Define test data with nested struct
        const testData = [
          {
            id: "doc1",
@@ -400,10 +716,8 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          },
        ];
        // Create Arrow table from the data
        const table = makeArrowTable(testData);
        // Verify schema has the nested struct fields
        const metadataField = table.schema.fields.find(
          (f) => f.name === "metadata",
        );
@@ -417,23 +731,17 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          "text",
        ]);
        // Convert to buffer and back (simulating storage and retrieval)
        const buf = await fromTableToBuffer(table);
        const retrievedTable = tableFromIPC(buf);
        // Verify the retrieved table has the same structure
        const rows = [];
        for (let i = 0; i < retrievedTable.numRows; i++) {
          rows.push(retrievedTable.get(i));
        }
        // Check values in the first row
        const firstRow = rows[0];
        expect(firstRow.id).toBe("doc1");
        expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]);
        // Verify metadata values are preserved (this is where the bug is)
        expect(firstRow.metadata).toBeDefined();
        expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts");
        expect(firstRow.metadata.startLine).toBe(10);
        expect(firstRow.metadata.endLine).toBe(20);
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -203,3 +203,106 @@ describe("given a connection", () => {
    });
  });
 });
 describe("clone table functionality", () => {
  let tmpDir: tmp.DirResult;
  let db: Connection;
  beforeEach(async () => {
    tmpDir = tmp.dirSync({ unsafeCleanup: true });
    db = await connect(tmpDir.name);
  });
  afterEach(() => tmpDir.removeCallback());
  it("should clone a table with latest version (default behavior)", async () => {
    // Create source table with some data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    const sourceTable = await db.createTable("source", data);
    // Add more data to create a new version
    const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
    await sourceTable.add(moreData);
    // Clone the table (should get latest version with 3 rows)
    const sourceUri = `${tmpDir.name}/source.lance`;
    const clonedTable = await db.cloneTable("cloned", sourceUri);
    // Verify cloned table has all 3 rows
    expect(await clonedTable.countRows()).toBe(3);
    expect((await db.tableNames()).includes("cloned")).toBe(true);
  });
  it("should clone a table from a specific version", async () => {
    // Create source table with initial data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    const sourceTable = await db.createTable("source", data);
    // Get the initial version
    const initialVersion = await sourceTable.version();
    // Add more data to create a new version
    const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
    await sourceTable.add(moreData);
    // Verify source now has 3 rows
    expect(await sourceTable.countRows()).toBe(3);
    // Clone from the initial version (should have only 2 rows)
    const sourceUri = `${tmpDir.name}/source.lance`;
    const clonedTable = await db.cloneTable("cloned", sourceUri, {
      sourceVersion: initialVersion,
    });
    // Verify cloned table has only the initial 2 rows
    expect(await clonedTable.countRows()).toBe(2);
  });
  it("should clone a table from a tagged version", async () => {
    // Create source table with initial data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    const sourceTable = await db.createTable("source", data);
    // Create a tag for the current version
    const tags = await sourceTable.tags();
    await tags.create("v1.0", await sourceTable.version());
    // Add more data after the tag
    const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
    await sourceTable.add(moreData);
    // Verify source now has 3 rows
    expect(await sourceTable.countRows()).toBe(3);
    // Clone from the tagged version (should have only 2 rows)
    const sourceUri = `${tmpDir.name}/source.lance`;
    const clonedTable = await db.cloneTable("cloned", sourceUri, {
      sourceTag: "v1.0",
    });
    // Verify cloned table has only the tagged version's 2 rows
    expect(await clonedTable.countRows()).toBe(2);
  });
  it("should fail when attempting deep clone", async () => {
    // Create source table with some data
    const data = [
      { id: 1, text: "hello", vector: [1.0, 2.0] },
      { id: 2, text: "world", vector: [3.0, 4.0] },
    ];
    await db.createTable("source", data);
    // Try to create a deep clone (should fail)
    const sourceUri = `${tmpDir.name}/source.lance`;
    await expect(
      db.cloneTable("cloned", sourceUri, { isShallow: false }),
    ).rejects.toThrow("Deep clone is not yet implemented");
  });
 });
--- a/Show More
+++ b/Show More