test

Merge branch 'main' of https://github.com/lancedb/lancedb into add-ivfrq
update docs
2026-05-05 14:10:41 +00:00 · 2025-09-30 11:58:20 -07:00 · 2025-09-30 16:30:58 +08:00 · 2025-09-29 18:24:58 +08:00 · 2025-09-29 17:47:59 +08:00 · 2025-09-29 17:08:12 +08:00
119 changed files with 2466 additions and 7304 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.3-beta.3"
+current_version = "0.22.2-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/actions/create-failure-issue/action.yml
+++ b/.github/actions/create-failure-issue/action.yml
@@ -1,45 +0,0 @@
-name: Create Failure Issue
-description: Creates a GitHub issue if any jobs in the workflow failed
-
-inputs:
-  job-results:
-    description: 'JSON string of job results from needs context'
-    required: true
-  workflow-name:
-    description: 'Name of the workflow'
-    required: true
-
-runs:
-  using: composite
-  steps:
-    - name: Check for failures and create issue
-      shell: bash
-      env:
-        JOB_RESULTS: ${{ inputs.job-results }}
-        WORKFLOW_NAME: ${{ inputs.workflow-name }}
-        RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-        GH_TOKEN: ${{ github.token }}
-      run: |
-        # Check if any job failed
-        if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
-          echo "Detected job failures, creating issue..."
-
-          # Extract failed job names
-          FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
-
-          # Create issue with workflow name, failed jobs, and run URL
-          gh issue create \
-            --title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
-            --body "The workflow **$WORKFLOW_NAME** failed during execution.
-
-        **Failed jobs:** $FAILED_JOBS
-
-        **Run URL:** $RUN_URL
-
-        Please investigate the failed jobs and address any issues." \
-            --label "ci"
-
-          echo "Issue created successfully"
-        else
-          echo "No job failures detected, skipping issue creation"
-        fi
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -38,17 +38,3 @@ jobs:
      - name: Publish the package
        run: |
          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [build]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    permissions:
-      contents: read
-      issues: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/codex-update-lance-dependency.yml
+++ b/.github/workflows/codex-update-lance-dependency.yml
@@ -1,107 +0,0 @@
-name: Codex Update Lance Dependency
-
-on:
-  workflow_call:
-    inputs:
-      tag:
-        description: "Tag name from Lance"
-        required: true
-        type: string
-  workflow_dispatch:
-    inputs:
-      tag:
-        description: "Tag name from Lance"
-        required: true
-        type: string
-
-permissions:
-  contents: write
-  pull-requests: write
-  actions: read
-
-jobs:
-  update:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Show inputs
-        run: |
-          echo "tag = ${{ inputs.tag }}"
-
-      - name: Checkout Repo LanceDB
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          persist-credentials: true
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: 20
-
-      - name: Install Codex CLI
-        run: npm install -g @openai/codex
-
-      - name: Install Rust toolchain
-        uses: dtolnay/rust-toolchain@stable
-        with:
-          toolchain: stable
-          components: clippy, rustfmt
-
-      - name: Install system dependencies
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y protobuf-compiler libssl-dev
-
-      - name: Install cargo-info
-        run: cargo install cargo-info
-
-      - name: Install Python dependencies
-        run: python3 -m pip install --upgrade pip packaging
-
-      - name: Configure git user
-        run: |
-          git config user.name "lancedb automation"
-          git config user.email "robot@lancedb.com"
-
-      - name: Configure Codex authentication
-        env:
-          CODEX_TOKEN_B64: ${{ secrets.CODEX_TOKEN }}
-        run: |
-          if [ -z "${CODEX_TOKEN_B64}" ]; then
-            echo "Repository secret CODEX_TOKEN is not defined; skipping Codex execution."
-            exit 1
-          fi
-          mkdir -p ~/.codex
-          echo "${CODEX_TOKEN_B64}" | base64 --decode > ~/.codex/auth.json
-
-      - name: Run Codex to update Lance dependency
-        env:
-          TAG: ${{ inputs.tag }}
-          GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
-          GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
-        run: |
-          set -euo pipefail
-          VERSION="${TAG#refs/tags/}"
-          VERSION="${VERSION#v}"
-          BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
-          cat <<EOF >/tmp/codex-prompt.txt
-          You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
-
-          Follow these steps exactly:
-          1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
-          2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
-          3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
-          4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
-          5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
-          6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
-          7. Push the branch to origin. If the branch already exists, force-push your changes.
-          8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
-          9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
-          10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
-
-          Constraints:
-          - Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
-          - Do not merge the PR.
-          - If any command fails, diagnose and fix the issue instead of aborting.
-          EOF
-          codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -58,7 +58,7 @@ jobs:
          cache: 'npm'
          cache-dependency-path: docs/package-lock.json
      - name: Install node dependencies
-        working-directory: nodejs
+        working-directory: node
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,6 +43,7 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
+          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -111,17 +112,3 @@ jobs:
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [linux-arm64, linux-x86, macos-arm64]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    permissions:
-      contents: read
-      issues: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -6,7 +6,6 @@ on:
      - main
  pull_request:
    paths:
-      - Cargo.toml
      - nodejs/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,17 +365,3 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [build-lancedb, test-lancedb, publish]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    permissions:
-      contents: read
-      issues: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -173,17 +173,3 @@ jobs:
          generate_release_notes: false
          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
          body: ${{ steps.python_release_notes.outputs.changelog }}
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [linux, mac, windows]
-    permissions:
-      contents: read
-      issues: write
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,7 +6,6 @@ on:
      - main
  pull_request:
    paths:
-      - Cargo.toml
      - python/**
      - .github/workflows/python.yml

--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -125,9 +125,6 @@ jobs:
      - name: Run examples
        run: cargo run --example simple --locked
      - name: Run remote tests
-        # Running this requires access to secrets, so skip if this is
-        # a PR from a fork.
-        if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
        run: make -C ./lancedb remote-tests

  macos:
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,101 +0,0 @@
-LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
-It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
-remote (against LanceDB Cloud).
-
-The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
-
-Project layout:
-
-* `rust/lancedb`: The LanceDB core Rust implementation.
-* `python`: The Python bindings, using PyO3.
-* `nodejs`: The Typescript bindings, using napi-rs
-* `java`: The Java bindings
-
-Common commands:
-
-* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
-* Run tests: `cargo test --quiet --features remote --tests`
-* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
-* Lint: `cargo clippy --quiet --features remote --tests --examples`
-* Format: `cargo fmt --all`
-
-Before committing changes, run formatting.
-
-## Coding tips
-
-* When writing Rust doctests for things that require a connection or table reference,
-  write them as a function instead of a fully executable test. This allows type checking
-  to run but avoids needing a full test environment. For example:
-    ```rust
-    /// ```
-    /// use lance_index::scalar::FullTextSearchQuery;
-    /// use lancedb::query::{QueryBase, ExecutableQuery};
-    ///
-    /// # use lancedb::Table;
-    /// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
-    /// let results = table.query()
-    ///     .full_text_search(FullTextSearchQuery::new("hello world".into()))
-    ///     .execute()
-    ///     .await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    ```
-
-## Example plan: adding a new method on Table
-
-Adding a new method involves first adding it to the Rust core, then exposing it
-in the Python and TypeScript bindings. There are both local and remote tables.
-Remote tables are implemented via a HTTP API and require the `remote` cargo
-feature flag to be enabled. Python has both sync and async methods.
-
-Rust core changes:
-
-1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
-2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
-3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
-    * Test with unit test in `rust/lancedb/src/table.rs`.
-4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
-    * Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
-
-Python bindings changes:
-
-1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
-2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
-3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
-4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
-5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
-    * Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
-6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
-7. Add unit test in `python/tests/test_table.py`.
-
-TypeScript bindings changes:
-
-1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
-2. Run `npm run build` to generate TypeScript definitions.
-3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
-4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
-    * Note: despite the name, this class is also used for remote tables.
-5. Add test in `nodejs/__test__/table.test.ts`.
-6. Run `npm run docs` to generate TypeScript documentation.
-
-## Review Guidelines
-
-Please consider the following when reviewing code contributions.
-
-### Rust API design
-* Design public APIs so they can be evolved easily in the future without breaking
-  changes. Often this means using builder patterns or options structs instead of
-  long argument lists.
-* For public APIs, prefer inputs that use `Into<T>` or `AsRef<T>` traits to allow
-  more flexible inputs. For example, use `name: Into<String>` instead of `name: String`,
-  so we don't have to write `func("my_string".to_string())`.
-
-### Testing
-* Ensure all new public APIs have documentation and examples.
-* Ensure that all bugfixes and features have corresponding tests. **We do not merge
-  code without tests.**
-
-### Documentation
-* New features must include updates to the rust documentation comments. Link to
-  relevant structs and methods to increase the value of documentation.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1 +0,0 @@
-AGENTS.md
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,80 @@
+LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
+It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
+remote (against LanceDB Cloud).
+
+The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
+
+Project layout:
+
+* `rust/lancedb`: The LanceDB core Rust implementation.
+* `python`: The Python bindings, using PyO3.
+* `nodejs`: The Typescript bindings, using napi-rs
+* `java`: The Java bindings
+
+Common commands:
+
+* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
+* Run tests: `cargo test --quiet --features remote --tests`
+* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
+* Lint: `cargo clippy --quiet --features remote --tests --examples`
+* Format: `cargo fmt --all`
+
+Before committing changes, run formatting.
+
+## Coding tips
+
+* When writing Rust doctests for things that require a connection or table reference,
+  write them as a function instead of a fully executable test. This allows type checking
+  to run but avoids needing a full test environment. For example:
+    ```rust
+    /// ```
+    /// use lance_index::scalar::FullTextSearchQuery;
+    /// use lancedb::query::{QueryBase, ExecutableQuery};
+    ///
+    /// # use lancedb::Table;
+    /// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
+    /// let results = table.query()
+    ///     .full_text_search(FullTextSearchQuery::new("hello world".into()))
+    ///     .execute()
+    ///     .await?;
+    /// # Ok(())
+    /// # }
+    /// ```
+    ```
+
+## Example plan: adding a new method on Table
+
+Adding a new method involves first adding it to the Rust core, then exposing it
+in the Python and TypeScript bindings. There are both local and remote tables.
+Remote tables are implemented via a HTTP API and require the `remote` cargo
+feature flag to be enabled. Python has both sync and async methods.
+
+Rust core changes:
+
+1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
+2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
+3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
+    * Test with unit test in `rust/lancedb/src/table.rs`.
+4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
+    * Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
+
+Python bindings changes:
+
+1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
+2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
+3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
+4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
+5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
+    * Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
+6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
+7. Add unit test in `python/tests/test_table.py`.
+
+TypeScript bindings changes:
+
+1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
+2. Run `npm run build` to generate TypeScript definitions.
+3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
+4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
+    * Note: despite the name, this class is also used for remote tables.
+5. Add test in `nodejs/__test__/table.test.ts`.
+6. Run `npm run docs` to generate TypeScript documentation.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,37 +15,31 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.38.3", default-features = false, "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-core = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-datagen = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-file = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.38.3", default-features = false, "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-namespace = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-namespace-impls = { "version" = "=0.38.3", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-arrow = { "version" = "=0.38.3", "tag" = "v0.38.3", "git" = "https://github.com/lancedb/lance.git" }
-ahash = "0.8"
+lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-io = { "version" = "=0.37.0", default-features = false, "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-namespace = "0.0.15"
 # Note that this one does not include pyarrow
-arrow = { version = "56.2", optional = false }
-arrow-array = "56.2"
-arrow-data = "56.2"
-arrow-ipc = "56.2"
-arrow-ord = "56.2"
-arrow-schema = "56.2"
-arrow-select = "56.2"
-arrow-cast = "56.2"
+arrow = { version = "55.1", optional = false }
+arrow-array = "55.1"
+arrow-data = "55.1"
+arrow-ipc = "55.1"
+arrow-ord = "55.1"
+arrow-schema = "55.1"
+arrow-arith = "55.1"
+arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "50.1", default-features = false }
-datafusion-catalog = "50.1"
-datafusion-common = { version = "50.1", default-features = false }
-datafusion-execution = "50.1"
-datafusion-expr = "50.1"
-datafusion-physical-plan = "50.1"
+datafusion = { version = "49.0", default-features = false }
+datafusion-catalog = "49.0"
+datafusion-common = { version = "49.0", default-features = false }
+datafusion-execution = "49.0"
+datafusion-expr = "49.0"
+datafusion-physical-plan = "49.0"
 env_logger = "0.11"
 half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
@@ -55,14 +49,18 @@ log = "0.4"
 moka = { version = "0.12", features = ["future"] }
 object_store = "0.12.0"
 pin-project = "1.0.7"
-rand = "0.9"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
+rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
 crunchy = "0.2.4"
-chrono = "0.4"
+# Temporary pins to work around downstream issues
+# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
+chrono = "=0.4.41"
+# https://github.com/RustCrypto/formats/issues/1684
+base64ct = "=1.6.0"
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -16,47 +16,30 @@ check_command_exists() {
 }

 if [[ ! -e ./lancedb ]]; then
-    if [[ -v SOPHON_READ_TOKEN ]]; then
-        INPUT="lancedb-linux-x64"
-        gh release \
-            --repo lancedb/lancedb \
-            download ci-support-binaries \
-            --pattern "${INPUT}" \
-            || die "failed to fetch cli."
-        check_command_exists openssl
-        openssl enc -aes-256-cbc \
-            -d -pbkdf2 \
-            -pass "env:SOPHON_READ_TOKEN" \
-            -in "${INPUT}" \
-            -out ./lancedb-linux-x64.tar.gz \
-            || die "openssl failed"
-        TARGET="${INPUT}.tar.gz"
-    else
-        ARCH="x64"
-        if [[ $OSTYPE == 'darwin'* ]]; then
-            UNAME=$(uname -m)
-            if [[ $UNAME == 'arm64' ]]; then
-                ARCH='arm64'
-            fi
-            OSTYPE="macos"
-        elif [[ $OSTYPE == 'linux'* ]]; then
-            if [[ $UNAME == 'aarch64' ]]; then
-                ARCH='arm64'
-            fi
-            OSTYPE="linux"
-        else
-            die "unknown OSTYPE: $OSTYPE"
+    ARCH="x64"
+    if [[ $OSTYPE == 'darwin'* ]]; then
+        UNAME=$(uname -m)
+        if [[ $UNAME == 'arm64' ]]; then
+            ARCH='arm64'
        fi
-
-        check_command_exists gh
-        TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
-        gh release \
-            --repo lancedb/sophon \
-            download lancedb-cli-v0.0.3 \
-            --pattern "${TARGET}" \
-            || die "failed to fetch cli."
+        OSTYPE="macos"
+    elif [[ $OSTYPE == 'linux'* ]]; then
+        if [[ $UNAME == 'aarch64' ]]; then
+            ARCH='arm64'
+        fi
+        OSTYPE="linux"
+    else
+        die "unknown OSTYPE: $OSTYPE"
    fi

+    check_command_exists gh
+    TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
+    gh release \
+        --repo lancedb/sophon \
+        download lancedb-cli-v0.0.3 \
+        --pattern "${TARGET}" \
+        || die "failed to fetch cli."
+
    check_command_exists tar
    tar xvf "${TARGET}" || die "tar failed."
    [[ -e ./lancedb ]] || die "failed to extract lancedb."
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -183,8 +183,10 @@ def set_preview_version(version: str):

    def line_updater(line: str) -> str:
        package_name = line.split("=", maxsplit=1)[0].strip()
+        base_version = version.split("-")[0]  # Get the base version without beta suffix
+
        # Build config in desired order: version, default-features, features, tag, git
-        config = {"version": f"={version}"}
+        config = {"version": f"={base_version}"}

        if extract_default_features(line):
            config["default-features"] = False
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -84,7 +84,6 @@ plugins:
        'examples.md': 'https://lancedb.com/docs/tutorials/'
        'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
        'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
-        'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'



@@ -403,4 +402,4 @@ extra:
    - icon: fontawesome/brands/x-twitter
      link: https://twitter.com/lancedb
    - icon: fontawesome/brands/linkedin
-      link: https://www.linkedin.com/company/lancedb
+      link: https://www.linkedin.com/company/lancedb
--- a/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/multimodal_embedding_functions/voyageai_multimodal_embedding.md
@@ -1,97 +0,0 @@
-# VoyageAI Embeddings : Multimodal
-
-VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
-under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
-
-Supported parameters (to be passed in `create` method) are:
-
-| Parameter | Type | Default Value           | Description                               |
-|---|---|-------------------------|-------------------------------------------|
-| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
-
-Usage Example:
-
-```python
-import base64
-import os
-from io import BytesIO
-
-import requests
-import lancedb
-from lancedb.pydantic import LanceModel, Vector
-from lancedb.embeddings import get_registry
-import pandas as pd
-
-os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
-
-db = lancedb.connect(".lancedb")
-func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
-
-
-def image_to_base64(image_bytes: bytes):
-    buffered = BytesIO(image_bytes)
-    img_str = base64.b64encode(buffered.getvalue())
-    return img_str.decode("utf-8")
-
-
-class Images(LanceModel):
-    label: str
-    image_uri: str = func.SourceField()  # image uri as the source
-    image_bytes: str = func.SourceField()  # image bytes base64 encoded as the source
-    vector: Vector(func.ndims()) = func.VectorField()  # vector column
-    vec_from_bytes: Vector(func.ndims()) = func.VectorField()  # Another vector column
-
-
-if "images" in db.table_names():
-    db.drop_table("images")
-table = db.create_table("images", schema=Images)
-labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
-uris = [
-    "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
-    "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
-    "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
-    "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
-    "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
-    "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
-]
-# get each uri as bytes
-images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
-table.add(
-    pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
-)
-```
-Now we can search using text from both the default vector column and the custom vector column
-```python
-
-# text search
-actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
-print(actual.label) # prints "dog"
-
-frombytes = (
-    table.search("man's best friend", vector_column_name="vec_from_bytes")
-    .limit(1)
-    .to_pydantic(Images)[0]
-)
-print(frombytes.label)
-
-```
-
-Because we're using a multi-modal embedding function, we can also search using images
-
-```python
-# image search
-query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
-image_bytes = requests.get(query_image_uri).content
-query_image = Image.open(BytesIO(image_bytes))
-actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
-print(actual.label == "dog")
-
-# image search using a custom vector column
-other = (
-    table.search(query_image, vector_column_name="vec_from_bytes")
-    .limit(1)
-    .to_pydantic(Images)[0]
-)
-print(actual.label)
-
-```
--- a/docs/src/guides/storage.md
+++ b/docs/src/guides/storage.md
@@ -397,6 +397,117 @@ For **read-only access**, LanceDB will need a policy such as:
 }
 ```

+#### DynamoDB Commit Store for concurrent writes
+
+By default, S3 does not support concurrent writes. Having two or more processes
+writing to the same table at the same time can lead to data corruption. This is
+because S3, unlike other object stores, does not have any atomic put or copy
+operation.
+
+To enable concurrent writes, you can configure LanceDB to use a DynamoDB table
+as a commit store. This table will be used to coordinate writes between
+different processes. To enable this feature, you must modify your connection
+URI to use the `s3+ddb` scheme and add a query parameter `ddbTableName` with the
+name of the table to use.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        import lancedb
+        db = lancedb.connect(
+            "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
+        )
+        ```
+    === "Async API"
+
+        ```python
+        import lancedb
+        async_db = await lancedb.connect_async(
+            "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
+        )    
+        ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+
+    const db = await lancedb.connect(
+        "s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
+    );
+    ```
+
+The DynamoDB table must be created with the following schema:
+
+- Hash key: `base_uri` (string)
+- Range key: `version` (number)
+
+You can create this programmatically with:
+
+=== "Python"
+
+    <!-- skip-test -->
+    ```python
+    import boto3
+
+    dynamodb = boto3.client("dynamodb")
+    table = dynamodb.create_table(
+        TableName=table_name,
+        KeySchema=[
+            {"AttributeName": "base_uri", "KeyType": "HASH"},
+            {"AttributeName": "version", "KeyType": "RANGE"},
+        ],
+        AttributeDefinitions=[
+            {"AttributeName": "base_uri", "AttributeType": "S"},
+            {"AttributeName": "version", "AttributeType": "N"},
+        ],
+        ProvisionedThroughput={"ReadCapacityUnits": 1, "WriteCapacityUnits": 1},
+    )
+    ```
+
+=== "JavaScript"
+
+    <!-- skip-test -->
+    ```javascript
+    import {
+      CreateTableCommand,
+      DynamoDBClient,
+    } from "@aws-sdk/client-dynamodb";
+
+    const dynamodb = new DynamoDBClient({
+      region: CONFIG.awsRegion,
+      credentials: {
+        accessKeyId: CONFIG.awsAccessKeyId,
+        secretAccessKey: CONFIG.awsSecretAccessKey,
+      },
+      endpoint: CONFIG.awsEndpoint,
+    });
+    const command = new CreateTableCommand({
+      TableName: table_name,
+      AttributeDefinitions: [
+        {
+          AttributeName: "base_uri",
+          AttributeType: "S",
+        },
+        {
+          AttributeName: "version",
+          AttributeType: "N",
+        },
+      ],
+      KeySchema: [
+        { AttributeName: "base_uri", KeyType: "HASH" },
+        { AttributeName: "version", KeyType: "RANGE" },
+      ],
+      ProvisionedThroughput: {
+        ReadCapacityUnits: 1,
+        WriteCapacityUnits: 1,
+      },
+    });
+    await client.send(command);
+    ```
+

 #### S3-compatible stores

--- a/docs/src/js/classes/PermutationBuilder.md
+++ b/docs/src/js/classes/PermutationBuilder.md
@@ -1,220 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / PermutationBuilder
-
-# Class: PermutationBuilder
-
-A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
-
-This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
-offering methods to configure data splits, shuffling, and filtering before executing
-the permutation to create a new table.
-
-## Methods
-
-### execute()
-
-```ts
-execute(): Promise<Table>
-```
-
-Execute the permutation and create the destination table.
-
-#### Returns
-
-`Promise`&lt;[`Table`](Table.md)&gt;
-
-A Promise that resolves to the new Table instance
-
-#### Example
-
-```ts
-const permutationTable = await builder.execute();
-console.log(`Created table: ${permutationTable.name}`);
-```
-
-***
-
-### filter()
-
-```ts
-filter(filter): PermutationBuilder
-```
-
-Configure filtering for the permutation.
-
-#### Parameters
-
-* **filter**: `string`
-    SQL filter expression
-
-#### Returns
-
-[`PermutationBuilder`](PermutationBuilder.md)
-
-A new PermutationBuilder instance
-
-#### Example
-
-```ts
-builder.filter("age > 18 AND status = 'active'");
-```
-
-***
-
-### shuffle()
-
-```ts
-shuffle(options): PermutationBuilder
-```
-
-Configure shuffling for the permutation.
-
-#### Parameters
-
-* **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
-    Configuration for shuffling
-
-#### Returns
-
-[`PermutationBuilder`](PermutationBuilder.md)
-
-A new PermutationBuilder instance
-
-#### Example
-
-```ts
-// Basic shuffle
-builder.shuffle({ seed: 42 });
-
-// Shuffle with clump size
-builder.shuffle({ seed: 42, clumpSize: 10 });
-```
-
-***
-
-### splitCalculated()
-
-```ts
-splitCalculated(calculation): PermutationBuilder
-```
-
-Configure calculated splits for the permutation.
-
-#### Parameters
-
-* **calculation**: `string`
-    SQL expression for calculating splits
-
-#### Returns
-
-[`PermutationBuilder`](PermutationBuilder.md)
-
-A new PermutationBuilder instance
-
-#### Example
-
-```ts
-builder.splitCalculated("user_id % 3");
-```
-
-***
-
-### splitHash()
-
-```ts
-splitHash(options): PermutationBuilder
-```
-
-Configure hash-based splits for the permutation.
-
-#### Parameters
-
-* **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
-    Configuration for hash-based splitting
-
-#### Returns
-
-[`PermutationBuilder`](PermutationBuilder.md)
-
-A new PermutationBuilder instance
-
-#### Example
-
-```ts
-builder.splitHash({
-  columns: ["user_id"],
-  splitWeights: [70, 30],
-  discardWeight: 0
-});
-```
-
-***
-
-### splitRandom()
-
-```ts
-splitRandom(options): PermutationBuilder
-```
-
-Configure random splits for the permutation.
-
-#### Parameters
-
-* **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
-    Configuration for random splitting
-
-#### Returns
-
-[`PermutationBuilder`](PermutationBuilder.md)
-
-A new PermutationBuilder instance
-
-#### Example
-
-```ts
-// Split by ratios
-builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
-
-// Split by counts
-builder.splitRandom({ counts: [1000, 500], seed: 42 });
-
-// Split with fixed size
-builder.splitRandom({ fixed: 100, seed: 42 });
-```
-
-***
-
-### splitSequential()
-
-```ts
-splitSequential(options): PermutationBuilder
-```
-
-Configure sequential splits for the permutation.
-
-#### Parameters
-
-* **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
-    Configuration for sequential splitting
-
-#### Returns
-
-[`PermutationBuilder`](PermutationBuilder.md)
-
-A new PermutationBuilder instance
-
-#### Example
-
-```ts
-// Split by ratios
-builder.splitSequential({ ratios: [0.8, 0.2] });
-
-// Split by counts
-builder.splitSequential({ counts: [800, 200] });
-
-// Split with fixed size
-builder.splitSequential({ fixed: 1000 });
-```
--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -80,7 +80,7 @@ AnalyzeExec verbose=true, metrics=[]
 ### execute()

 ```ts
-protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
+protected execute(options?): RecordBatchIterator
 ```

 Execute the query and return the results as an
@@ -91,7 +91,7 @@ Execute the query and return the results as an

 #### Returns

-`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
+[`RecordBatchIterator`](RecordBatchIterator.md)

 #### See

@@ -343,29 +343,6 @@ This is useful for pagination.

 ***

-### outputSchema()
-
-```ts
-outputSchema(): Promise<Schema<any>>
-```
-
-Returns the schema of the output that will be returned by this query.
-
-This can be used to inspect the types and names of the columns that will be
-returned by the query before executing it.
-
-#### Returns
-
-`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
-
-An Arrow Schema describing the output columns.
-
-#### Inherited from
-
-`StandardQueryBase.outputSchema`
-
-***
-
 ### select()

 ```ts
--- a/docs/src/js/classes/QueryBase.md
+++ b/docs/src/js/classes/QueryBase.md
@@ -81,7 +81,7 @@ AnalyzeExec verbose=true, metrics=[]
 ### execute()

 ```ts
-protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
+protected execute(options?): RecordBatchIterator
 ```

 Execute the query and return the results as an
@@ -92,7 +92,7 @@ Execute the query and return the results as an

 #### Returns

-`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
+[`RecordBatchIterator`](RecordBatchIterator.md)

 #### See

@@ -140,25 +140,6 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();

 ***

-### outputSchema()
-
-```ts
-outputSchema(): Promise<Schema<any>>
-```
-
-Returns the schema of the output that will be returned by this query.
-
-This can be used to inspect the types and names of the columns that will be
-returned by the query before executing it.
-
-#### Returns
-
-`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
-
-An Arrow Schema describing the output columns.
-
-***
-
 ### select()

 ```ts
--- a/docs/src/js/classes/RecordBatchIterator.md
+++ b/docs/src/js/classes/RecordBatchIterator.md
@@ -0,0 +1,43 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RecordBatchIterator
+
+# Class: RecordBatchIterator
+
+## Implements
+
+- `AsyncIterator`&lt;`RecordBatch`&gt;
+
+## Constructors
+
+### new RecordBatchIterator()
+
+```ts
+new RecordBatchIterator(promise?): RecordBatchIterator
+```
+
+#### Parameters
+
+* **promise?**: `Promise`&lt;`RecordBatchIterator`&gt;
+
+#### Returns
+
+[`RecordBatchIterator`](RecordBatchIterator.md)
+
+## Methods
+
+### next()
+
+```ts
+next(): Promise<IteratorResult<RecordBatch<any>, any>>
+```
+
+#### Returns
+
+`Promise`&lt;`IteratorResult`&lt;`RecordBatch`&lt;`any`&gt;, `any`&gt;&gt;
+
+#### Implementation of
+
+`AsyncIterator.next`
--- a/docs/src/js/classes/TakeQuery.md
+++ b/docs/src/js/classes/TakeQuery.md
@@ -76,7 +76,7 @@ AnalyzeExec verbose=true, metrics=[]
 ### execute()

 ```ts
-protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
+protected execute(options?): RecordBatchIterator
 ```

 Execute the query and return the results as an
@@ -87,7 +87,7 @@ Execute the query and return the results as an

 #### Returns

-`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
+[`RecordBatchIterator`](RecordBatchIterator.md)

 #### See

@@ -143,29 +143,6 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();

 ***

-### outputSchema()
-
-```ts
-outputSchema(): Promise<Schema<any>>
-```
-
-Returns the schema of the output that will be returned by this query.
-
-This can be used to inspect the types and names of the columns that will be
-returned by the query before executing it.
-
-#### Returns
-
-`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
-
-An Arrow Schema describing the output columns.
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`outputSchema`](QueryBase.md#outputschema)
-
-***
-
 ### select()

 ```ts
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -221,7 +221,7 @@ also increase the latency of your query. The default value is 1.5*limit.
 ### execute()

 ```ts
-protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
+protected execute(options?): RecordBatchIterator
 ```

 Execute the query and return the results as an
@@ -232,7 +232,7 @@ Execute the query and return the results as an

 #### Returns

-`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
+[`RecordBatchIterator`](RecordBatchIterator.md)

 #### See

@@ -498,29 +498,6 @@ This is useful for pagination.

 ***

-### outputSchema()
-
-```ts
-outputSchema(): Promise<Schema<any>>
-```
-
-Returns the schema of the output that will be returned by this query.
-
-This can be used to inspect the types and names of the columns that will be
-returned by the query before executing it.
-
-#### Returns
-
-`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
-
-An Arrow Schema describing the output columns.
-
-#### Inherited from
-
-`StandardQueryBase.outputSchema`
-
-***
-
 ### postfilter()

 ```ts
--- a/docs/src/js/functions/RecordBatchIterator.md
+++ b/docs/src/js/functions/RecordBatchIterator.md
@@ -1,19 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / RecordBatchIterator
-
-# Function: RecordBatchIterator()
-
-```ts
-function RecordBatchIterator(promisedInner): AsyncGenerator<RecordBatch<any>, void, unknown>
-```
-
-## Parameters
-
-* **promisedInner**: `Promise`&lt;`RecordBatchIterator`&gt;
-
-## Returns
-
-`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
--- a/docs/src/js/functions/permutationBuilder.md
+++ b/docs/src/js/functions/permutationBuilder.md
@@ -1,34 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / permutationBuilder
-
-# Function: permutationBuilder()
-
-```ts
-function permutationBuilder(table): PermutationBuilder
-```
-
-Create a permutation builder for the given table.
-
-## Parameters
-
-* **table**: [`Table`](../classes/Table.md)
-    The source table to create a permutation from
-
-## Returns
-
-[`PermutationBuilder`](../classes/PermutationBuilder.md)
-
-A PermutationBuilder instance
-
-## Example
-
-```ts
-const builder = permutationBuilder(sourceTable, "training_data")
-  .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
-  .shuffle({ seed: 123 });
-
-const trainingTable = await builder.execute();
-```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -28,10 +28,10 @@
 - [MultiMatchQuery](classes/MultiMatchQuery.md)
 - [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
 - [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
- [PermutationBuilder](classes/PermutationBuilder.md)
 - [PhraseQuery](classes/PhraseQuery.md)
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
+- [RecordBatchIterator](classes/RecordBatchIterator.md)
 - [Session](classes/Session.md)
 - [StaticHeaderProvider](classes/StaticHeaderProvider.md)
 - [Table](classes/Table.md)
@@ -76,10 +76,6 @@
 - [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
 - [RemovalStats](interfaces/RemovalStats.md)
 - [RetryConfig](interfaces/RetryConfig.md)
- [ShuffleOptions](interfaces/ShuffleOptions.md)
- [SplitHashOptions](interfaces/SplitHashOptions.md)
- [SplitRandomOptions](interfaces/SplitRandomOptions.md)
- [SplitSequentialOptions](interfaces/SplitSequentialOptions.md)
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
@@ -104,8 +100,6 @@

 ## Functions

- [RecordBatchIterator](functions/RecordBatchIterator.md)
 - [connect](functions/connect.md)
 - [makeArrowTable](functions/makeArrowTable.md)
 - [packBits](functions/packBits.md)
- [permutationBuilder](functions/permutationBuilder.md)
--- a/docs/src/js/interfaces/IvfRqOptions.md
+++ b/docs/src/js/interfaces/IvfRqOptions.md
@@ -1,101 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / IvfRqOptions
-
-# Interface: IvfRqOptions
-
-## Properties
-
-### distanceType?
-
-```ts
-optional distanceType: "l2" | "cosine" | "dot";
-```
-
-Distance type to use to build the index.
-
-Default value is "l2".
-
-This is used when training the index to calculate the IVF partitions
-(vectors are grouped in partitions with similar vectors according to this
-distance type) and during quantization.
-
-The distance type used to train an index MUST match the distance type used
-to search the index. Failure to do so will yield inaccurate results.
-
-The following distance types are available:
-
-"l2" - Euclidean distance.
-"cosine" - Cosine distance.
-"dot" - Dot product.
-
-***
-
-### maxIterations?
-
-```ts
-optional maxIterations: number;
-```
-
-Max iterations to train IVF kmeans.
-
-When training an IVF index we use kmeans to calculate the partitions. This parameter
-controls how many iterations of kmeans to run.
-
-The default value is 50.
-
-***
-
-### numBits?
-
-```ts
-optional numBits: number;
-```
-
-Number of bits per dimension for residual quantization.
-
-This value controls how much each residual component is compressed. The more
-bits, the more accurate the index will be but the slower search. Typical values
-are small integers; the default is 1 bit per dimension.
-
-***
-
-### numPartitions?
-
-```ts
-optional numPartitions: number;
-```
-
-The number of IVF partitions to create.
-
-This value should generally scale with the number of rows in the dataset.
-By default the number of partitions is the square root of the number of
-rows.
-
-If this value is too large then the first part of the search (picking the
-right partition) will be slow. If this value is too small then the second
-part of the search (searching within a partition) will be slow.
-
-***
-
-### sampleRate?
-
-```ts
-optional sampleRate: number;
-```
-
-The number of vectors, per partition, to sample when training IVF kmeans.
-
-When an IVF index is trained, we need to calculate partitions. These are groups
-of vectors that are similar to each other. To do this we use an algorithm called kmeans.
-
-Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
-random sample of the data. This parameter controls the size of the sample. The total
-number of vectors used to train the index is `sample_rate * num_partitions`.
-
-Increasing this value might improve the quality of the index but in most cases the
-default should be sufficient.
-
-The default value is 256.
--- a/docs/src/js/interfaces/ShuffleOptions.md
+++ b/docs/src/js/interfaces/ShuffleOptions.md
@@ -1,23 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / ShuffleOptions
-
-# Interface: ShuffleOptions
-
-## Properties
-
-### clumpSize?
-
-```ts
-optional clumpSize: number;
-```
-
-***
-
-### seed?
-
-```ts
-optional seed: number;
-```
--- a/docs/src/js/interfaces/SplitHashOptions.md
+++ b/docs/src/js/interfaces/SplitHashOptions.md
@@ -1,31 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / SplitHashOptions
-
-# Interface: SplitHashOptions
-
-## Properties
-
-### columns
-
-```ts
-columns: string[];
-```
-
-***
-
-### discardWeight?
-
-```ts
-optional discardWeight: number;
-```
-
-***
-
-### splitWeights
-
-```ts
-splitWeights: number[];
-```
--- a/docs/src/js/interfaces/SplitRandomOptions.md
+++ b/docs/src/js/interfaces/SplitRandomOptions.md
@@ -1,39 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / SplitRandomOptions
-
-# Interface: SplitRandomOptions
-
-## Properties
-
-### counts?
-
-```ts
-optional counts: number[];
-```
-
-***
-
-### fixed?
-
-```ts
-optional fixed: number;
-```
-
-***
-
-### ratios?
-
-```ts
-optional ratios: number[];
-```
-
-***
-
-### seed?
-
-```ts
-optional seed: number;
-```
--- a/docs/src/js/interfaces/SplitSequentialOptions.md
+++ b/docs/src/js/interfaces/SplitSequentialOptions.md
@@ -1,31 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / SplitSequentialOptions
-
-# Interface: SplitSequentialOptions
-
-## Properties
-
-### counts?
-
-```ts
-optional counts: number[];
-```
-
-***
-
-### fixed?
-
-```ts
-optional fixed: number;
-```
-
-***
-
-### ratios?
-
-```ts
-optional ratios: number[];
-```
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.3-beta.3</version>
+        <version>0.22.2-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.3-beta.3</version>
+        <version>0.22.2-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.3-beta.3</version>
+    <version>0.22.2-beta.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/AGENTS.md
+++ b/nodejs/AGENTS.md
@@ -1,13 +0,0 @@
-These are the typescript bindings of LanceDB.
-The core Rust library is in the `../rust/lancedb` directory, the rust binding
-code is in the `src/` directory and the typescript bindings are in
-the `lancedb/` directory.
-
-Whenever you change the Rust code, you will need to recompile: `npm run build`.
-
-Common commands:
-* Build: `npm run build`
-* Lint: `npm run lint`
-* Fix lints: `npm run lint-fix`
-* Test: `npm test`
-* Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -1 +0,0 @@
-AGENTS.md
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -0,0 +1,13 @@
+These are the typescript bindings of LanceDB.
+The core Rust library is in the `../rust/lancedb` directory, the rust binding
+code is in the `src/` directory and the typescript bindings are in
+the `lancedb/` directory.
+
+Whenever you change the Rust code, you will need to recompile: `npm run build`.
+
+Common commands:
+* Build: `npm run build`
+* Lint: `npm run lint`
+* Fix lints: `npm run lint-fix`
+* Test: `npm test`
+* Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.3-beta.3"
+version = "0.22.2-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/permutation.test.ts
+++ b/nodejs/test/permutation.test.ts
@@ -1,227 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import * as tmp from "tmp";
-import { Table, connect, permutationBuilder } from "../lancedb";
-import { makeArrowTable } from "../lancedb/arrow";
-
-describe("PermutationBuilder", () => {
-  let tmpDir: tmp.DirResult;
-  let table: Table;
-
-  beforeEach(async () => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-    const db = await connect(tmpDir.name);
-
-    // Create test data
-    const data = makeArrowTable(
-      [
-        { id: 1, value: 10 },
-        { id: 2, value: 20 },
-        { id: 3, value: 30 },
-        { id: 4, value: 40 },
-        { id: 5, value: 50 },
-        { id: 6, value: 60 },
-        { id: 7, value: 70 },
-        { id: 8, value: 80 },
-        { id: 9, value: 90 },
-        { id: 10, value: 100 },
-      ],
-      { vectorColumns: {} },
-    );
-
-    table = await db.createTable("test_table", data);
-  });
-
-  afterEach(() => {
-    tmpDir.removeCallback();
-  });
-
-  test("should create permutation builder", () => {
-    const builder = permutationBuilder(table);
-    expect(builder).toBeDefined();
-  });
-
-  test("should execute basic permutation", async () => {
-    const builder = permutationBuilder(table);
-    const permutationTable = await builder.execute();
-
-    expect(permutationTable).toBeDefined();
-
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-  });
-
-  test("should create permutation with random splits", async () => {
-    const builder = permutationBuilder(table).splitRandom({
-      ratios: [1.0],
-      seed: 42,
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-  });
-
-  test("should create permutation with percentage splits", async () => {
-    const builder = permutationBuilder(table).splitRandom({
-      ratios: [0.3, 0.7],
-      seed: 42,
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-
-    // Check split distribution
-    const split0Count = await permutationTable.countRows("split_id = 0");
-    const split1Count = await permutationTable.countRows("split_id = 1");
-
-    expect(split0Count).toBeGreaterThan(0);
-    expect(split1Count).toBeGreaterThan(0);
-    expect(split0Count + split1Count).toBe(10);
-  });
-
-  test("should create permutation with count splits", async () => {
-    const builder = permutationBuilder(table).splitRandom({
-      counts: [3, 7],
-      seed: 42,
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-
-    // Check split distribution
-    const split0Count = await permutationTable.countRows("split_id = 0");
-    const split1Count = await permutationTable.countRows("split_id = 1");
-
-    expect(split0Count).toBe(3);
-    expect(split1Count).toBe(7);
-  });
-
-  test("should create permutation with hash splits", async () => {
-    const builder = permutationBuilder(table).splitHash({
-      columns: ["id"],
-      splitWeights: [50, 50],
-      discardWeight: 0,
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-
-    // Check that splits exist
-    const split0Count = await permutationTable.countRows("split_id = 0");
-    const split1Count = await permutationTable.countRows("split_id = 1");
-
-    expect(split0Count).toBeGreaterThan(0);
-    expect(split1Count).toBeGreaterThan(0);
-    expect(split0Count + split1Count).toBe(10);
-  });
-
-  test("should create permutation with sequential splits", async () => {
-    const builder = permutationBuilder(table).splitSequential({
-      ratios: [0.5, 0.5],
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-
-    // Check split distribution - sequential should give exactly 5 and 5
-    const split0Count = await permutationTable.countRows("split_id = 0");
-    const split1Count = await permutationTable.countRows("split_id = 1");
-
-    expect(split0Count).toBe(5);
-    expect(split1Count).toBe(5);
-  });
-
-  test("should create permutation with calculated splits", async () => {
-    const builder = permutationBuilder(table).splitCalculated("id % 2");
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-
-    // Check split distribution
-    const split0Count = await permutationTable.countRows("split_id = 0");
-    const split1Count = await permutationTable.countRows("split_id = 1");
-
-    expect(split0Count).toBeGreaterThan(0);
-    expect(split1Count).toBeGreaterThan(0);
-    expect(split0Count + split1Count).toBe(10);
-  });
-
-  test("should create permutation with shuffle", async () => {
-    const builder = permutationBuilder(table).shuffle({
-      seed: 42,
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-  });
-
-  test("should create permutation with shuffle and clump size", async () => {
-    const builder = permutationBuilder(table).shuffle({
-      seed: 42,
-      clumpSize: 2,
-    });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(10);
-  });
-
-  test("should create permutation with filter", async () => {
-    const builder = permutationBuilder(table).filter("value > 50");
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(5); // Values 60, 70, 80, 90, 100
-  });
-
-  test("should chain multiple operations", async () => {
-    const builder = permutationBuilder(table)
-      .filter("value <= 80")
-      .splitRandom({ ratios: [0.5, 0.5], seed: 42 })
-      .shuffle({ seed: 123 });
-
-    const permutationTable = await builder.execute();
-    const rowCount = await permutationTable.countRows();
-    expect(rowCount).toBe(8); // Values 10, 20, 30, 40, 50, 60, 70, 80
-
-    // Check split distribution
-    const split0Count = await permutationTable.countRows("split_id = 0");
-    const split1Count = await permutationTable.countRows("split_id = 1");
-
-    expect(split0Count).toBeGreaterThan(0);
-    expect(split1Count).toBeGreaterThan(0);
-    expect(split0Count + split1Count).toBe(8);
-  });
-
-  test("should throw error for invalid split arguments", () => {
-    const builder = permutationBuilder(table);
-
-    // Test no arguments provided
-    expect(() => builder.splitRandom({})).toThrow(
-      "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
-    );
-
-    // Test multiple arguments provided
-    expect(() =>
-      builder.splitRandom({ ratios: [0.5, 0.5], counts: [3, 7], seed: 42 }),
-    ).toThrow("Exactly one of 'ratios', 'counts', or 'fixed' must be provided");
-  });
-
-  test("should throw error when builder is consumed", async () => {
-    const builder = permutationBuilder(table);
-
-    // Execute once
-    await builder.execute();
-
-    // Should throw error on second execution
-    await expect(builder.execute()).rejects.toThrow("Builder already consumed");
-  });
-});
--- a/nodejs/test/query.test.ts
+++ b/nodejs/test/query.test.ts
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import * as tmp from "tmp";
-
-import { type Table, connect } from "../lancedb";
-import {
-  Field,
-  FixedSizeList,
-  Float32,
-  Int64,
-  Schema,
-  Utf8,
-  makeArrowTable,
-} from "../lancedb/arrow";
-import { Index } from "../lancedb/indices";
-
-describe("Query outputSchema", () => {
-  let tmpDir: tmp.DirResult;
-  let table: Table;
-
-  beforeEach(async () => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-    const db = await connect(tmpDir.name);
-
-    // Create table with explicit schema to ensure proper types
-    const schema = new Schema([
-      new Field("a", new Int64(), true),
-      new Field("text", new Utf8(), true),
-      new Field(
-        "vec",
-        new FixedSizeList(2, new Field("item", new Float32())),
-        true,
-      ),
-    ]);
-
-    const data = makeArrowTable(
-      [
-        { a: 1n, text: "foo", vec: [1, 2] },
-        { a: 2n, text: "bar", vec: [3, 4] },
-        { a: 3n, text: "baz", vec: [5, 6] },
-      ],
-      { schema },
-    );
-    table = await db.createTable("test", data);
-  });
-
-  afterEach(() => {
-    tmpDir.removeCallback();
-  });
-
-  it("should return schema for plain query", async () => {
-    const schema = await table.query().outputSchema();
-
-    expect(schema.fields.length).toBe(3);
-    expect(schema.fields.map((f) => f.name)).toEqual(["a", "text", "vec"]);
-    expect(schema.fields[0].type.toString()).toBe("Int64");
-    expect(schema.fields[1].type.toString()).toBe("Utf8");
-  });
-
-  it("should return schema with dynamic projection", async () => {
-    const schema = await table.query().select({ bl: "a * 2" }).outputSchema();
-
-    expect(schema.fields.length).toBe(1);
-    expect(schema.fields[0].name).toBe("bl");
-    expect(schema.fields[0].type.toString()).toBe("Int64");
-  });
-
-  it("should return schema for vector search with _distance column", async () => {
-    const schema = await table
-      .vectorSearch([1, 2])
-      .select(["a"])
-      .outputSchema();
-
-    expect(schema.fields.length).toBe(2);
-    expect(schema.fields.map((f) => f.name)).toEqual(["a", "_distance"]);
-    expect(schema.fields[0].type.toString()).toBe("Int64");
-    expect(schema.fields[1].type.toString()).toBe("Float32");
-  });
-
-  it("should return schema for FTS search", async () => {
-    await table.createIndex("text", { config: Index.fts() });
-
-    const schema = await table
-      .search("foo", "fts")
-      .select(["a"])
-      .outputSchema();
-
-    // FTS search includes _score column in addition to selected columns
-    expect(schema.fields.length).toBe(2);
-    expect(schema.fields.map((f) => f.name)).toContain("a");
-    expect(schema.fields.map((f) => f.name)).toContain("_score");
-    const aField = schema.fields.find((f) => f.name === "a");
-    expect(aField?.type.toString()).toBe("Int64");
-  });
-
-  it("should return schema for take query", async () => {
-    const schema = await table.takeOffsets([0]).select(["text"]).outputSchema();
-
-    expect(schema.fields.length).toBe(1);
-    expect(schema.fields[0].name).toBe("text");
-    expect(schema.fields[0].type.toString()).toBe("Utf8");
-  });
-
-  it("should return full schema when no select is specified", async () => {
-    const schema = await table.query().outputSchema();
-
-    // Should return all columns
-    expect(schema.fields.length).toBe(3);
-  });
-});
--- a/nodejs/test/sanitize.test.ts
+++ b/nodejs/test/sanitize.test.ts
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import * as arrow from "../lancedb/arrow";
-import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
-
-describe("sanitize", function () {
-  describe("sanitizeType function", function () {
-    it("should handle type objects", function () {
-      const type = new arrow.Int32();
-      const result = sanitizeType(type);
-
-      expect(result.typeId).toBe(arrow.Type.Int);
-      expect((result as arrow.Int).bitWidth).toBe(32);
-      expect((result as arrow.Int).isSigned).toBe(true);
-
-      const floatType = {
-        typeId: 3, // Type.Float = 3
-        precision: 2,
-        toString: () => "Float",
-        isFloat: true,
-        isFixedWidth: true,
-      };
-
-      const floatResult = sanitizeType(floatType);
-      expect(floatResult).toBeInstanceOf(arrow.DataType);
-      expect(floatResult.typeId).toBe(arrow.Type.Float);
-
-      const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
-      expect(floatResult2).toBeInstanceOf(arrow.DataType);
-      expect(floatResult2.typeId).toBe(arrow.Type.Float);
-    });
-
-    const allTypeNameTestCases = [
-      ["null", new arrow.Null()],
-      ["binary", new arrow.Binary()],
-      ["utf8", new arrow.Utf8()],
-      ["bool", new arrow.Bool()],
-      ["int8", new arrow.Int8()],
-      ["int16", new arrow.Int16()],
-      ["int32", new arrow.Int32()],
-      ["int64", new arrow.Int64()],
-      ["uint8", new arrow.Uint8()],
-      ["uint16", new arrow.Uint16()],
-      ["uint32", new arrow.Uint32()],
-      ["uint64", new arrow.Uint64()],
-      ["float16", new arrow.Float16()],
-      ["float32", new arrow.Float32()],
-      ["float64", new arrow.Float64()],
-      ["datemillisecond", new arrow.DateMillisecond()],
-      ["dateday", new arrow.DateDay()],
-      ["timenanosecond", new arrow.TimeNanosecond()],
-      ["timemicrosecond", new arrow.TimeMicrosecond()],
-      ["timemillisecond", new arrow.TimeMillisecond()],
-      ["timesecond", new arrow.TimeSecond()],
-      ["intervaldaytime", new arrow.IntervalDayTime()],
-      ["intervalyearmonth", new arrow.IntervalYearMonth()],
-      ["durationnanosecond", new arrow.DurationNanosecond()],
-      ["durationmicrosecond", new arrow.DurationMicrosecond()],
-      ["durationmillisecond", new arrow.DurationMillisecond()],
-      ["durationsecond", new arrow.DurationSecond()],
-    ] as const;
-
-    it.each(allTypeNameTestCases)(
-      'should map type name "%s" to %s',
-      function (name, expected) {
-        const result = sanitizeType(name);
-        expect(result).toBeInstanceOf(expected.constructor);
-      },
-    );
-
-    const caseVariationTestCases = [
-      ["NULL", new arrow.Null()],
-      ["Utf8", new arrow.Utf8()],
-      ["FLOAT32", new arrow.Float32()],
-      ["DaTedAy", new arrow.DateDay()],
-    ] as const;
-
-    it.each(caseVariationTestCases)(
-      'should be case insensitive for type name "%s" mapped to %s',
-      function (name, expected) {
-        const result = sanitizeType(name);
-        expect(result).toBeInstanceOf(expected.constructor);
-      },
-    );
-
-    it("should throw error for unrecognized type name", function () {
-      expect(() => sanitizeType("invalid_type")).toThrow(
-        "Unrecognized type name in schema: invalid_type",
-      );
-    });
-  });
-
-  describe("sanitizeField function", function () {
-    it("should handle field with string type name", function () {
-      const field = sanitizeField({
-        name: "string_field",
-        type: "utf8",
-        nullable: true,
-        metadata: new Map([["key", "value"]]),
-      });
-
-      expect(field).toBeInstanceOf(arrow.Field);
-      expect(field.name).toBe("string_field");
-      expect(field.type).toBeInstanceOf(arrow.Utf8);
-      expect(field.nullable).toBe(true);
-      expect(field.metadata?.get("key")).toBe("value");
-    });
-
-    it("should handle field with type object", function () {
-      const floatType = {
-        typeId: 3, // Float
-        precision: 32,
-      };
-
-      const field = sanitizeField({
-        name: "float_field",
-        type: floatType,
-        nullable: false,
-      });
-
-      expect(field).toBeInstanceOf(arrow.Field);
-      expect(field.name).toBe("float_field");
-      expect(field.type).toBeInstanceOf(arrow.DataType);
-      expect(field.type.typeId).toBe(arrow.Type.Float);
-      expect((field.type as arrow.Float64).precision).toBe(32);
-      expect(field.nullable).toBe(false);
-    });
-
-    it("should handle field with direct Type instance", function () {
-      const field = sanitizeField({
-        name: "bool_field",
-        type: new arrow.Bool(),
-        nullable: true,
-      });
-
-      expect(field).toBeInstanceOf(arrow.Field);
-      expect(field.name).toBe("bool_field");
-      expect(field.type).toBeInstanceOf(arrow.Bool);
-      expect(field.nullable).toBe(true);
-    });
-
-    it("should throw error for invalid field object", function () {
-      expect(() =>
-        sanitizeField({
-          type: "int32",
-          nullable: true,
-        }),
-      ).toThrow(
-        "The field passed in is missing a `type`/`name`/`nullable` property",
-      );
-
-      // Invalid type
-      expect(() =>
-        sanitizeField({
-          name: "invalid",
-          type: { invalid: true },
-          nullable: true,
-        }),
-      ).toThrow("Expected a Type to have a typeId property");
-
-      // Invalid nullable
-      expect(() =>
-        sanitizeField({
-          name: "invalid_nullable",
-          type: "int32",
-          nullable: "not a boolean",
-        }),
-      ).toThrow("The field passed in had a non-boolean `nullable` property");
-    });
-
-    it("should report error for invalid type name", function () {
-      expect(() =>
-        sanitizeField({
-          name: "invalid_field",
-          type: "invalid_type",
-          nullable: true,
-        }),
-      ).toThrow(
-        "Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
-      );
-    });
-  });
-});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -10,13 +10,7 @@ import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
 import * as arrow18 from "apache-arrow-18";

-import {
-  Connection,
-  MatchQuery,
-  PhraseQuery,
-  Table,
-  connect,
-} from "../lancedb";
+import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
 import {
  Table as ArrowTable,
  Field,
@@ -27,8 +21,6 @@ import {
  Int64,
  List,
  Schema,
-  SchemaLike,
-  Type,
  Uint8,
  Utf8,
  makeArrowTable,
@@ -219,7 +211,8 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      },
    );

-    it("should be able to omit nullable fields", async () => {
+    // TODO: https://github.com/lancedb/lancedb/issues/1832
+    it.skip("should be able to omit nullable fields", async () => {
      const db = await connect(tmpDir.name);
      const schema = new arrow.Schema([
        new arrow.Field(
@@ -243,36 +236,23 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await table.add([data3]);

      let res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) =>
-        r.vector ? Array.from(r.vector) : null,
-      );
+      const resVector = res.map((r) => r.get("vector").toArray());
      expect(resVector).toEqual([null, data2.vector, data3.vector]);
-      const resItem = res.map((r) => r.item);
+      const resItem = res.map((r) => r.get("item").toArray());
      expect(resItem).toEqual(["foo", null, "bar"]);
-      const resPrice = res.map((r) => r.price);
+      const resPrice = res.map((r) => r.get("price").toArray());
      expect(resPrice).toEqual([10.0, 2.0, 3.0]);

      const data4 = { item: "foo" };
      // We can't omit a column if it's not nullable
-      await expect(table.add([data4])).rejects.toThrow(
-        "Append with different schema",
-      );
+      await expect(table.add([data4])).rejects.toThrow("Invalid user input");

      // But we can alter columns to make them nullable
      await table.alterColumns([{ path: "price", nullable: true }]);
      await table.add([data4]);

-      res = (await table.query().limit(10).toArray()).map((r) => ({
-        ...r.toJSON(),
-        vector: r.vector ? Array.from(r.vector) : null,
-      }));
-      // Rust fills missing nullable fields with null
-      expect(res).toEqual([
-        { ...data1, vector: null },
-        { ...data2, item: null },
-        data3,
-        { ...data4, price: null, vector: null },
-      ]);
+      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
+      expect(res).toEqual([data1, data2, data3, data4]);
    });

    it("should be able to insert nullable data for non-nullable fields", async () => {
@@ -350,43 +330,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const table = await db.createTable("my_table", data);
      expect(await table.countRows()).toEqual(2);
    });
-
-    it("should allow undefined and omitted nullable vector fields", async () => {
-      // Test for the bug: can't pass undefined or omit vector column
-      const db = await connect("memory://");
-      const schema = new arrow.Schema([
-        new arrow.Field("id", new arrow.Int32(), true),
-        new arrow.Field(
-          "vector",
-          new arrow.FixedSizeList(
-            32,
-            new arrow.Field("item", new arrow.Float32(), true),
-          ),
-          true, // nullable = true
-        ),
-      ]);
-      const table = await db.createEmptyTable("test_table", schema);
-
-      // Should not throw error for undefined value
-      await table.add([{ id: 0, vector: undefined }]);
-
-      // Should not throw error for omitted field
-      await table.add([{ id: 1 }]);
-
-      // Should still work for null
-      await table.add([{ id: 2, vector: null }]);
-
-      // Should still work for actual vector
-      const testVector = new Array(32).fill(0.5);
-      await table.add([{ id: 3, vector: testVector }]);
-      expect(await table.countRows()).toEqual(4);
-
-      const res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) =>
-        r.vector ? Array.from(r.vector) : null,
-      );
-      expect(resVector).toEqual([null, null, null, testVector]);
-    });
  },
 );

@@ -1520,9 +1463,7 @@ describe("when optimizing a dataset", () => {

  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
-      version - 1
-    }.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
    fs.rmSync(versionFile);

    let stats = await table.optimize({ deleteUnverified: false });
@@ -2036,52 +1977,3 @@ describe("column name options", () => {
    expect(results2.length).toBe(10);
  });
 });
-
-describe("when creating an empty table", () => {
-  let con: Connection;
-  beforeEach(async () => {
-    const tmpDir = tmp.dirSync({ unsafeCleanup: true });
-    con = await connect(tmpDir.name);
-  });
-  afterEach(() => {
-    con.close();
-  });
-
-  it("can create an empty table from an arrow Schema", async () => {
-    const schema = new Schema([
-      new Field("id", new Int64()),
-      new Field("vector", new Float64()),
-    ]);
-    const table = await con.createEmptyTable("test", schema);
-    const actualSchema = await table.schema();
-    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
-    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
-    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
-    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
-  });
-
-  it("can create an empty table from schema that specifies field types by name", async () => {
-    const schemaLike = {
-      fields: [
-        {
-          name: "id",
-          type: "int64",
-          nullable: true,
-        },
-        {
-          name: "vector",
-          type: "float64",
-          nullable: true,
-        },
-      ],
-      metadata: new Map(),
-      names: ["id", "vector"],
-    } satisfies SchemaLike;
-    const table = await con.createEmptyTable("test", schemaLike);
-    const actualSchema = await table.schema();
-    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
-    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
-    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
-    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
-  });
-});
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -73,7 +73,7 @@ export type FieldLike =
  | {
      type: string;
      name: string;
-      nullable: boolean;
+      nullable?: boolean;
      metadata?: Map<string, string>;
    };

@@ -1285,36 +1285,19 @@ function validateSchemaEmbeddings(
    if (isFixedSizeList(field.type)) {
      field = sanitizeField(field);
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
-        // Check if there's an embedding function registered for this field
-        let hasEmbeddingFunction = false;
-
-        // Check schema metadata for embedding functions
        if (schema.metadata.has("embedding_functions")) {
          const embeddings = JSON.parse(
            schema.metadata.get("embedding_functions")!,
          );
-          // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
-          if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
-            hasEmbeddingFunction = true;
-          }
-        }
-
-        // Check passed embedding function parameter
-        if (embeddings && embeddings.vectorColumn === field.name) {
-          hasEmbeddingFunction = true;
-        }
-
-        // If the field is nullable AND there's no embedding function, allow undefined/omitted values
-        if (field.nullable && !hasEmbeddingFunction) {
-          fields.push(field);
-        } else {
-          // Either not nullable OR has embedding function - require explicit values
-          if (hasEmbeddingFunction) {
-            // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
-            fields.push(field);
-          } else {
+          if (
+            // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
+            embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
+            undefined
+          ) {
            missingEmbeddingFields.push(field);
          }
+        } else {
+          missingEmbeddingFields.push(field);
        }
      } else {
        fields.push(field);
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -43,10 +43,6 @@ export {
  DeleteResult,
  DropColumnsResult,
  UpdateResult,
-  SplitRandomOptions,
-  SplitHashOptions,
-  SplitSequentialOptions,
-  ShuffleOptions,
 } from "./native.js";

 export {
@@ -115,7 +111,6 @@ export {
 export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";

 export * as embedding from "./embedding";
-export { permutationBuilder, PermutationBuilder } from "./permutation";
 export * as rerankers from "./rerankers";
 export {
  SchemaLike,
--- a/nodejs/lancedb/permutation.ts
+++ b/nodejs/lancedb/permutation.ts
@@ -1,183 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import {
-  PermutationBuilder as NativePermutationBuilder,
-  Table as NativeTable,
-  ShuffleOptions,
-  SplitHashOptions,
-  SplitRandomOptions,
-  SplitSequentialOptions,
-  permutationBuilder as nativePermutationBuilder,
-} from "./native.js";
-import { LocalTable, Table } from "./table";
-
-/**
- * A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
- *
- * This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
- * offering methods to configure data splits, shuffling, and filtering before executing
- * the permutation to create a new table.
- */
-export class PermutationBuilder {
-  private inner: NativePermutationBuilder;
-
-  /**
-   * @hidden
-   */
-  constructor(inner: NativePermutationBuilder) {
-    this.inner = inner;
-  }
-
-  /**
-   * Configure random splits for the permutation.
-   *
-   * @param options - Configuration for random splitting
-   * @returns A new PermutationBuilder instance
-   * @example
-   * ```ts
-   * // Split by ratios
-   * builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
-   *
-   * // Split by counts
-   * builder.splitRandom({ counts: [1000, 500], seed: 42 });
-   *
-   * // Split with fixed size
-   * builder.splitRandom({ fixed: 100, seed: 42 });
-   * ```
-   */
-  splitRandom(options: SplitRandomOptions): PermutationBuilder {
-    const newInner = this.inner.splitRandom(options);
-    return new PermutationBuilder(newInner);
-  }
-
-  /**
-   * Configure hash-based splits for the permutation.
-   *
-   * @param options - Configuration for hash-based splitting
-   * @returns A new PermutationBuilder instance
-   * @example
-   * ```ts
-   * builder.splitHash({
-   *   columns: ["user_id"],
-   *   splitWeights: [70, 30],
-   *   discardWeight: 0
-   * });
-   * ```
-   */
-  splitHash(options: SplitHashOptions): PermutationBuilder {
-    const newInner = this.inner.splitHash(options);
-    return new PermutationBuilder(newInner);
-  }
-
-  /**
-   * Configure sequential splits for the permutation.
-   *
-   * @param options - Configuration for sequential splitting
-   * @returns A new PermutationBuilder instance
-   * @example
-   * ```ts
-   * // Split by ratios
-   * builder.splitSequential({ ratios: [0.8, 0.2] });
-   *
-   * // Split by counts
-   * builder.splitSequential({ counts: [800, 200] });
-   *
-   * // Split with fixed size
-   * builder.splitSequential({ fixed: 1000 });
-   * ```
-   */
-  splitSequential(options: SplitSequentialOptions): PermutationBuilder {
-    const newInner = this.inner.splitSequential(options);
-    return new PermutationBuilder(newInner);
-  }
-
-  /**
-   * Configure calculated splits for the permutation.
-   *
-   * @param calculation - SQL expression for calculating splits
-   * @returns A new PermutationBuilder instance
-   * @example
-   * ```ts
-   * builder.splitCalculated("user_id % 3");
-   * ```
-   */
-  splitCalculated(calculation: string): PermutationBuilder {
-    const newInner = this.inner.splitCalculated(calculation);
-    return new PermutationBuilder(newInner);
-  }
-
-  /**
-   * Configure shuffling for the permutation.
-   *
-   * @param options - Configuration for shuffling
-   * @returns A new PermutationBuilder instance
-   * @example
-   * ```ts
-   * // Basic shuffle
-   * builder.shuffle({ seed: 42 });
-   *
-   * // Shuffle with clump size
-   * builder.shuffle({ seed: 42, clumpSize: 10 });
-   * ```
-   */
-  shuffle(options: ShuffleOptions): PermutationBuilder {
-    const newInner = this.inner.shuffle(options);
-    return new PermutationBuilder(newInner);
-  }
-
-  /**
-   * Configure filtering for the permutation.
-   *
-   * @param filter - SQL filter expression
-   * @returns A new PermutationBuilder instance
-   * @example
-   * ```ts
-   * builder.filter("age > 18 AND status = 'active'");
-   * ```
-   */
-  filter(filter: string): PermutationBuilder {
-    const newInner = this.inner.filter(filter);
-    return new PermutationBuilder(newInner);
-  }
-
-  /**
-   * Execute the permutation and create the destination table.
-   *
-   * @returns A Promise that resolves to the new Table instance
-   * @example
-   * ```ts
-   * const permutationTable = await builder.execute();
-   * console.log(`Created table: ${permutationTable.name}`);
-   * ```
-   */
-  async execute(): Promise<Table> {
-    const nativeTable: NativeTable = await this.inner.execute();
-    return new LocalTable(nativeTable);
-  }
-}
-
-/**
- * Create a permutation builder for the given table.
- *
- * @param table - The source table to create a permutation from
- * @returns A PermutationBuilder instance
- * @example
- * ```ts
- * const builder = permutationBuilder(sourceTable, "training_data")
- *   .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
- *   .shuffle({ seed: 123 });
- *
- * const trainingTable = await builder.execute();
- * ```
- */
-export function permutationBuilder(table: Table): PermutationBuilder {
-  // Extract the inner native table from the TypeScript wrapper
-  const localTable = table as LocalTable;
-  // Access inner through type assertion since it's private
-  const nativeBuilder = nativePermutationBuilder(
-    // biome-ignore lint/suspicious/noExplicitAny: need access to private variable
-    (localTable as any).inner,
-  );
-  return new PermutationBuilder(nativeBuilder);
-}
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -20,25 +20,35 @@ import {
 } from "./native";
 import { Reranker } from "./rerankers";

-export async function* RecordBatchIterator(
-  promisedInner: Promise<NativeBatchIterator>,
-) {
-  const inner = await promisedInner;
+export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
+  private promisedInner?: Promise<NativeBatchIterator>;
+  private inner?: NativeBatchIterator;

-  if (inner === undefined) {
-    throw new Error("Invalid iterator state");
+  constructor(promise?: Promise<NativeBatchIterator>) {
+    // TODO: check promise reliably so we dont need to pass two arguments.
+    this.promisedInner = promise;
  }

-  for (let buffer = await inner.next(); buffer; buffer = await inner.next()) {
-    const { batches } = tableFromIPC(buffer);
-
-    if (batches.length !== 1) {
+  // biome-ignore lint/suspicious/noExplicitAny: skip
+  async next(): Promise<IteratorResult<RecordBatch<any>>> {
+    if (this.inner === undefined) {
+      this.inner = await this.promisedInner;
+    }
+    if (this.inner === undefined) {
+      throw new Error("Invalid iterator state state");
+    }
+    const n = await this.inner.next();
+    if (n == null) {
+      return Promise.resolve({ done: true, value: null });
+    }
+    const tbl = tableFromIPC(n);
+    if (tbl.batches.length != 1) {
      throw new Error("Expected only one batch");
    }
-
-    yield batches[0];
+    return Promise.resolve({ done: false, value: tbl.batches[0] });
  }
 }
+/* eslint-enable */

 class RecordBatchIterable<
  NativeQueryType extends NativeQuery | NativeVectorQuery | NativeTakeQuery,
@@ -54,7 +64,7 @@ class RecordBatchIterable<

  // biome-ignore lint/suspicious/noExplicitAny: skip
  [Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
-    return RecordBatchIterator(
+    return new RecordBatchIterator(
      this.inner.execute(this.options?.maxBatchLength, this.options?.timeoutMs),
    );
  }
@@ -221,8 +231,10 @@ export class QueryBase<
   * single query)
   *
   */
-  protected execute(options?: Partial<QueryExecutionOptions>) {
-    return RecordBatchIterator(this.nativeExecute(options));
+  protected execute(
+    options?: Partial<QueryExecutionOptions>,
+  ): RecordBatchIterator {
+    return new RecordBatchIterator(this.nativeExecute(options));
  }

  /**
@@ -230,7 +242,8 @@ export class QueryBase<
   */
  // biome-ignore lint/suspicious/noExplicitAny: skip
  [Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>> {
-    return RecordBatchIterator(this.nativeExecute());
+    const promise = this.nativeExecute();
+    return new RecordBatchIterator(promise);
  }

  /** Collect the results as an Arrow @see {@link ArrowTable}. */
@@ -313,25 +326,6 @@ export class QueryBase<
      return this.inner.analyzePlan();
    }
  }
-
-  /**
-   * Returns the schema of the output that will be returned by this query.
-   *
-   * This can be used to inspect the types and names of the columns that will be
-   * returned by the query before executing it.
-   *
-   * @returns An Arrow Schema describing the output columns.
-   */
-  async outputSchema(): Promise<import("./arrow").Schema> {
-    let schemaBuffer: Buffer;
-    if (this.inner instanceof Promise) {
-      schemaBuffer = await this.inner.then((inner) => inner.outputSchema());
-    } else {
-      schemaBuffer = await this.inner.outputSchema();
-    }
-    const schema = tableFromIPC(schemaBuffer).schema;
-    return schema;
-  }
 }

 export class StandardQueryBase<
--- a/nodejs/lancedb/sanitize.ts
+++ b/nodejs/lancedb/sanitize.ts
@@ -326,9 +326,6 @@ export function sanitizeDictionary(typeLike: object) {

 // biome-ignore lint/suspicious/noExplicitAny: skip
 export function sanitizeType(typeLike: unknown): DataType<any> {
-  if (typeof typeLike === "string") {
-    return dataTypeFromName(typeLike);
-  }
  if (typeof typeLike !== "object" || typeLike === null) {
    throw Error("Expected a Type but object was null/undefined");
  }
@@ -450,7 +447,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
    case Type.DurationSecond:
      return new DurationSecond();
    default:
-      throw new Error("Unrecognized type id in schema: " + typeId);
+      throw new Error("Unrecoginized type id in schema: " + typeId);
  }
 }

@@ -470,15 +467,7 @@ export function sanitizeField(fieldLike: unknown): Field {
      "The field passed in is missing a `type`/`name`/`nullable` property",
    );
  }
-  let type: DataType;
-  try {
-    type = sanitizeType(fieldLike.type);
-  } catch (error: unknown) {
-    throw Error(
-      `Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
-      { cause: error },
-    );
-  }
+  const type = sanitizeType(fieldLike.type);
  const name = fieldLike.name;
  if (!(typeof name === "string")) {
    throw Error("The field passed in had a non-string `name` property");
@@ -592,46 +581,3 @@ function sanitizeData(
    },
  );
 }
-
-const constructorsByTypeName = {
-  null: () => new Null(),
-  binary: () => new Binary(),
-  utf8: () => new Utf8(),
-  bool: () => new Bool(),
-  int8: () => new Int8(),
-  int16: () => new Int16(),
-  int32: () => new Int32(),
-  int64: () => new Int64(),
-  uint8: () => new Uint8(),
-  uint16: () => new Uint16(),
-  uint32: () => new Uint32(),
-  uint64: () => new Uint64(),
-  float16: () => new Float16(),
-  float32: () => new Float32(),
-  float64: () => new Float64(),
-  datemillisecond: () => new DateMillisecond(),
-  dateday: () => new DateDay(),
-  timenanosecond: () => new TimeNanosecond(),
-  timemicrosecond: () => new TimeMicrosecond(),
-  timemillisecond: () => new TimeMillisecond(),
-  timesecond: () => new TimeSecond(),
-  intervaldaytime: () => new IntervalDayTime(),
-  intervalyearmonth: () => new IntervalYearMonth(),
-  durationnanosecond: () => new DurationNanosecond(),
-  durationmicrosecond: () => new DurationMicrosecond(),
-  durationmillisecond: () => new DurationMillisecond(),
-  durationsecond: () => new DurationSecond(),
-} as const;
-
-type MappableTypeName = keyof typeof constructorsByTypeName;
-
-export function dataTypeFromName(typeName: string): DataType {
-  const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
-  const _constructor = constructorsByTypeName[normalizedTypeName];
-
-  if (!_constructor) {
-    throw new Error("Unrecognized type name in schema: " + typeName);
-  }
-
-  return _constructor();
-}
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.3-beta.3",
+  "version": "0.22.2-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.3-beta.3",
+	"version": "0.22.2-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.3-beta.3",
+  "version": "0.22.2-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.3-beta.3",
+      "version": "0.22.2-beta.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.3-beta.3",
+  "version": "0.22.2-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -12,7 +12,6 @@ mod header;
 mod index;
 mod iterator;
 pub mod merge;
-pub mod permutation;
 mod query;
 pub mod remote;
 mod rerankers;
--- a/nodejs/src/permutation.rs
+++ b/nodejs/src/permutation.rs
@@ -1,214 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use std::sync::{Arc, Mutex};
-
-use crate::{error::NapiErrorExt, table::Table};
-use lancedb::dataloader::{
-    permutation::builder::{PermutationBuilder as LancePermutationBuilder, ShuffleStrategy},
-    permutation::split::{SplitSizes, SplitStrategy},
-};
-use napi_derive::napi;
-
-#[napi(object)]
-pub struct SplitRandomOptions {
-    pub ratios: Option<Vec<f64>>,
-    pub counts: Option<Vec<i64>>,
-    pub fixed: Option<i64>,
-    pub seed: Option<i64>,
-}
-
-#[napi(object)]
-pub struct SplitHashOptions {
-    pub columns: Vec<String>,
-    pub split_weights: Vec<i64>,
-    pub discard_weight: Option<i64>,
-}
-
-#[napi(object)]
-pub struct SplitSequentialOptions {
-    pub ratios: Option<Vec<f64>>,
-    pub counts: Option<Vec<i64>>,
-    pub fixed: Option<i64>,
-}
-
-#[napi(object)]
-pub struct ShuffleOptions {
-    pub seed: Option<i64>,
-    pub clump_size: Option<i64>,
-}
-
-pub struct PermutationBuilderState {
-    pub builder: Option<LancePermutationBuilder>,
-}
-
-#[napi]
-pub struct PermutationBuilder {
-    state: Arc<Mutex<PermutationBuilderState>>,
-}
-
-impl PermutationBuilder {
-    pub fn new(builder: LancePermutationBuilder) -> Self {
-        Self {
-            state: Arc::new(Mutex::new(PermutationBuilderState {
-                builder: Some(builder),
-            })),
-        }
-    }
-}
-
-impl PermutationBuilder {
-    fn modify(
-        &self,
-        func: impl FnOnce(LancePermutationBuilder) -> LancePermutationBuilder,
-    ) -> napi::Result<Self> {
-        let mut state = self.state.lock().unwrap();
-        let builder = state
-            .builder
-            .take()
-            .ok_or_else(|| napi::Error::from_reason("Builder already consumed"))?;
-        state.builder = Some(func(builder));
-        Ok(Self {
-            state: self.state.clone(),
-        })
-    }
-}
-
-#[napi]
-impl PermutationBuilder {
-    /// Configure random splits
-    #[napi]
-    pub fn split_random(&self, options: SplitRandomOptions) -> napi::Result<Self> {
-        // Check that exactly one split type is provided
-        let split_args_count = [
-            options.ratios.is_some(),
-            options.counts.is_some(),
-            options.fixed.is_some(),
-        ]
-        .iter()
-        .filter(|&&x| x)
-        .count();
-
-        if split_args_count != 1 {
-            return Err(napi::Error::from_reason(
-                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
-            ));
-        }
-
-        let sizes = if let Some(ratios) = options.ratios {
-            SplitSizes::Percentages(ratios)
-        } else if let Some(counts) = options.counts {
-            SplitSizes::Counts(counts.into_iter().map(|c| c as u64).collect())
-        } else if let Some(fixed) = options.fixed {
-            SplitSizes::Fixed(fixed as u64)
-        } else {
-            unreachable!("One of the split arguments must be provided");
-        };
-
-        let seed = options.seed.map(|s| s as u64);
-
-        self.modify(|builder| builder.with_split_strategy(SplitStrategy::Random { seed, sizes }))
-    }
-
-    /// Configure hash-based splits
-    #[napi]
-    pub fn split_hash(&self, options: SplitHashOptions) -> napi::Result<Self> {
-        let split_weights = options
-            .split_weights
-            .into_iter()
-            .map(|w| w as u64)
-            .collect();
-        let discard_weight = options.discard_weight.unwrap_or(0) as u64;
-
-        self.modify(|builder| {
-            builder.with_split_strategy(SplitStrategy::Hash {
-                columns: options.columns,
-                split_weights,
-                discard_weight,
-            })
-        })
-    }
-
-    /// Configure sequential splits
-    #[napi]
-    pub fn split_sequential(&self, options: SplitSequentialOptions) -> napi::Result<Self> {
-        // Check that exactly one split type is provided
-        let split_args_count = [
-            options.ratios.is_some(),
-            options.counts.is_some(),
-            options.fixed.is_some(),
-        ]
-        .iter()
-        .filter(|&&x| x)
-        .count();
-
-        if split_args_count != 1 {
-            return Err(napi::Error::from_reason(
-                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
-            ));
-        }
-
-        let sizes = if let Some(ratios) = options.ratios {
-            SplitSizes::Percentages(ratios)
-        } else if let Some(counts) = options.counts {
-            SplitSizes::Counts(counts.into_iter().map(|c| c as u64).collect())
-        } else if let Some(fixed) = options.fixed {
-            SplitSizes::Fixed(fixed as u64)
-        } else {
-            unreachable!("One of the split arguments must be provided");
-        };
-
-        self.modify(|builder| builder.with_split_strategy(SplitStrategy::Sequential { sizes }))
-    }
-
-    /// Configure calculated splits
-    #[napi]
-    pub fn split_calculated(&self, calculation: String) -> napi::Result<Self> {
-        self.modify(|builder| {
-            builder.with_split_strategy(SplitStrategy::Calculated { calculation })
-        })
-    }
-
-    /// Configure shuffling
-    #[napi]
-    pub fn shuffle(&self, options: ShuffleOptions) -> napi::Result<Self> {
-        let seed = options.seed.map(|s| s as u64);
-        let clump_size = options.clump_size.map(|c| c as u64);
-
-        self.modify(|builder| {
-            builder.with_shuffle_strategy(ShuffleStrategy::Random { seed, clump_size })
-        })
-    }
-
-    /// Configure filtering
-    #[napi]
-    pub fn filter(&self, filter: String) -> napi::Result<Self> {
-        self.modify(|builder| builder.with_filter(filter))
-    }
-
-    /// Execute the permutation builder and create the table
-    #[napi]
-    pub async fn execute(&self) -> napi::Result<Table> {
-        let builder = {
-            let mut state = self.state.lock().unwrap();
-            state
-                .builder
-                .take()
-                .ok_or_else(|| napi::Error::from_reason("Builder already consumed"))?
-        };
-
-        let table = builder.build().await.default_error()?;
-        Ok(Table::new(table))
-    }
-}
-
-/// Create a permutation builder for the given table
-#[napi]
-pub fn permutation_builder(table: &crate::table::Table) -> napi::Result<PermutationBuilder> {
-    use lancedb::dataloader::permutation::builder::PermutationBuilder as LancePermutationBuilder;
-
-    let inner_table = table.inner_ref()?.clone();
-    let inner_builder = LancePermutationBuilder::new(inner_table);
-
-    Ok(PermutationBuilder::new(inner_builder))
-}
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -22,7 +22,7 @@ use crate::error::NapiErrorExt;
 use crate::iterator::RecordBatchIterator;
 use crate::rerankers::Reranker;
 use crate::rerankers::RerankerCallbacks;
-use crate::util::{parse_distance_type, schema_to_buffer};
+use crate::util::parse_distance_type;

 #[napi]
 pub struct Query {
@@ -88,12 +88,6 @@ impl Query {
        self.inner = self.inner.clone().with_row_id();
    }

-    #[napi(catch_unwind)]
-    pub async fn output_schema(&self) -> napi::Result<Buffer> {
-        let schema = self.inner.output_schema().await.default_error()?;
-        schema_to_buffer(&schema)
-    }
-
    #[napi(catch_unwind)]
    pub async fn execute(
        &self,
@@ -279,12 +273,6 @@ impl VectorQuery {
            .rerank(Arc::new(Reranker::new(callbacks)));
    }

-    #[napi(catch_unwind)]
-    pub async fn output_schema(&self) -> napi::Result<Buffer> {
-        let schema = self.inner.output_schema().await.default_error()?;
-        schema_to_buffer(&schema)
-    }
-
    #[napi(catch_unwind)]
    pub async fn execute(
        &self,
@@ -358,12 +346,6 @@ impl TakeQuery {
        self.inner = self.inner.clone().with_row_id();
    }

-    #[napi(catch_unwind)]
-    pub async fn output_schema(&self) -> napi::Result<Buffer> {
-        let schema = self.inner.output_schema().await.default_error()?;
-        schema_to_buffer(&schema)
-    }
-
    #[napi(catch_unwind)]
    pub async fn execute(
        &self,
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -3,6 +3,7 @@

 use std::collections::HashMap;

+use arrow_ipc::writer::FileWriter;
 use lancedb::ipc::ipc_file_to_batches;
 use lancedb::table::{
    AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
@@ -15,7 +16,6 @@ use crate::error::NapiErrorExt;
 use crate::index::Index;
 use crate::merge::NativeMergeInsertBuilder;
 use crate::query::{Query, TakeQuery, VectorQuery};
-use crate::util::schema_to_buffer;

 #[napi]
 pub struct Table {
@@ -26,7 +26,7 @@ pub struct Table {
 }

 impl Table {
-    pub(crate) fn inner_ref(&self) -> napi::Result<&LanceDbTable> {
+    fn inner_ref(&self) -> napi::Result<&LanceDbTable> {
        self.inner
            .as_ref()
            .ok_or_else(|| napi::Error::from_reason(format!("Table {} is closed", self.name)))
@@ -64,7 +64,14 @@ impl Table {
    #[napi(catch_unwind)]
    pub async fn schema(&self) -> napi::Result<Buffer> {
        let schema = self.inner_ref()?.schema().await.default_error()?;
-        schema_to_buffer(&schema)
+        let mut writer = FileWriter::try_new(vec![], &schema)
+            .map_err(|e| napi::Error::from_reason(format!("Failed to create IPC file: {}", e)))?;
+        writer
+            .finish()
+            .map_err(|e| napi::Error::from_reason(format!("Failed to finish IPC file: {}", e)))?;
+        Ok(Buffer::from(writer.into_inner().map_err(|e| {
+            napi::Error::from_reason(format!("Failed to get IPC file: {}", e))
+        })?))
    }

    #[napi(catch_unwind)]
--- a/nodejs/src/util.rs
+++ b/nodejs/src/util.rs
@@ -1,10 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use arrow_ipc::writer::FileWriter;
-use arrow_schema::Schema;
 use lancedb::DistanceType;
-use napi::bindgen_prelude::Buffer;

 pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<DistanceType> {
    match distance_type.as_ref().to_lowercase().as_str() {
@@ -18,15 +15,3 @@ pub fn parse_distance_type(distance_type: impl AsRef<str>) -> napi::Result<Dista
        ))),
    }
 }
-
-/// Convert an Arrow Schema to an Arrow IPC file buffer
-pub fn schema_to_buffer(schema: &Schema) -> napi::Result<Buffer> {
-    let mut writer = FileWriter::try_new(vec![], schema)
-        .map_err(|e| napi::Error::from_reason(format!("Failed to create IPC file: {}", e)))?;
-    writer
-        .finish()
-        .map_err(|e| napi::Error::from_reason(format!("Failed to finish IPC file: {}", e)))?;
-    Ok(Buffer::from(writer.into_inner().map_err(|e| {
-        napi::Error::from_reason(format!("Failed to get IPC file: {}", e))
-    })?))
-}
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.3-beta.4"
+current_version = "0.25.2-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
@@ -24,19 +24,6 @@ commit = true
 message = "Bump version: {current_version} → {new_version}"
 commit_args = ""

-# Update Cargo.lock after version bump
-pre_commit_hooks = [
-  """
-    cd python && cargo update -p lancedb-python
-    if git diff --quiet ../Cargo.lock; then
-        echo "Cargo.lock unchanged"
-    else
-        git add ../Cargo.lock
-        echo "Updated and staged Cargo.lock"
-    fi
-    """,
-]
-
 [tool.bumpversion.parts.pre_l]
 values = ["beta", "final"]
 optional_value = "final"
--- a/python/AGENTS.md
+++ b/python/AGENTS.md
@@ -1,19 +0,0 @@
-These are the Python bindings of LanceDB.
-The core Rust library is in the `../rust/lancedb` directory, the rust binding
-code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
-
-Common commands:
-
-* Build: `make develop`
-* Format: `make format`
-* Lint: `make check`
-* Fix lints: `make fix`
-* Test: `make test`
-* Doc test: `make doctest`
-
-Before committing changes, run lints and then formatting.
-
-When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
-
-When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
-with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
--- a/python/CLAUDE.md
+++ b/python/CLAUDE.md
@@ -1 +0,0 @@
-AGENTS.md
--- a/python/CLAUDE.md
+++ b/python/CLAUDE.md
@@ -0,0 +1,19 @@
+These are the Python bindings of LanceDB.
+The core Rust library is in the `../rust/lancedb` directory, the rust binding
+code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
+
+Common commands:
+
+* Build: `make develop`
+* Format: `make format`
+* Lint: `make check`
+* Fix lints: `make fix`
+* Test: `make test`
+* Doc test: `make doctest`
+
+Before committing changes, run lints and then formatting.
+
+When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
+
+When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
+with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.3-beta.4"
+version = "0.25.2-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -14,12 +14,12 @@ name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "56.2", features = ["pyarrow"] }
+arrow = { version = "55.1", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
-pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.25", features = [
+pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.24", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -28,7 +28,7 @@ futures.workspace = true
 tokio = { version = "1.40", features = ["sync"] }

 [build-dependencies]
-pyo3-build-config = { version = "0.25", features = [
+pyo3-build-config = { version = "0.24", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -5,12 +5,12 @@ dynamic = ["version"]
 dependencies = [
    "deprecation",
    "numpy",
-    "overrides>=0.7; python_version<'3.12'",
+    "overrides>=0.7",
    "packaging",
    "pyarrow>=16",
    "pydantic>=1.10",
    "tqdm>=4.27.0",
-    "lance-namespace>=0.0.16"
+    "lance-namespace==0.0.6"
 ]
 description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -123,8 +123,6 @@ class Table:
    @property
    def tags(self) -> Tags: ...
    def query(self) -> Query: ...
-    def take_offsets(self, offsets: list[int]) -> TakeQuery: ...
-    def take_row_ids(self, row_ids: list[int]) -> TakeQuery: ...
    def vector_search(self) -> VectorQuery: ...

 class Tags:
@@ -135,7 +133,6 @@ class Tags:
    async def update(self, tag: str, version: int): ...

 class IndexConfig:
-    name: str
    index_type: str
    columns: List[str]

@@ -167,7 +164,6 @@ class Query:
    def postfilter(self): ...
    def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
    def nearest_to_text(self, query: dict) -> FTSQuery: ...
-    async def output_schema(self) -> pa.Schema: ...
    async def execute(
        self, max_batch_length: Optional[int], timeout: Optional[timedelta]
    ) -> RecordBatchStream: ...
@@ -175,13 +171,6 @@ class Query:
    async def analyze_plan(self) -> str: ...
    def to_query_request(self) -> PyQueryRequest: ...

-class TakeQuery:
-    def select(self, columns: List[str]): ...
-    def with_row_id(self): ...
-    async def output_schema(self) -> pa.Schema: ...
-    async def execute(self) -> RecordBatchStream: ...
-    def to_query_request(self) -> PyQueryRequest: ...
-
 class FTSQuery:
    def where(self, filter: str): ...
    def select(self, columns: List[str]): ...
@@ -193,14 +182,12 @@ class FTSQuery:
    def get_query(self) -> str: ...
    def add_query_vector(self, query_vec: pa.Array) -> None: ...
    def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
-    async def output_schema(self) -> pa.Schema: ...
    async def execute(
        self, max_batch_length: Optional[int], timeout: Optional[timedelta]
    ) -> RecordBatchStream: ...
    def to_query_request(self) -> PyQueryRequest: ...

 class VectorQuery:
-    async def output_schema(self) -> pa.Schema: ...
    async def execute(self) -> RecordBatchStream: ...
    def where(self, filter: str): ...
    def select(self, columns: List[str]): ...
@@ -308,34 +295,3 @@ class AlterColumnsResult:

 class DropColumnsResult:
    version: int
-
-class AsyncPermutationBuilder:
-    def select(self, projections: Dict[str, str]) -> "AsyncPermutationBuilder": ...
-    def split_random(
-        self,
-        *,
-        ratios: Optional[List[float]] = None,
-        counts: Optional[List[int]] = None,
-        fixed: Optional[int] = None,
-        seed: Optional[int] = None,
-    ) -> "AsyncPermutationBuilder": ...
-    def split_hash(
-        self, columns: List[str], split_weights: List[int], *, discard_weight: int = 0
-    ) -> "AsyncPermutationBuilder": ...
-    def split_sequential(
-        self,
-        *,
-        ratios: Optional[List[float]] = None,
-        counts: Optional[List[int]] = None,
-        fixed: Optional[int] = None,
-    ) -> "AsyncPermutationBuilder": ...
-    def split_calculated(self, calculation: str) -> "AsyncPermutationBuilder": ...
-    def shuffle(
-        self, seed: Optional[int], clump_size: Optional[int]
-    ) -> "AsyncPermutationBuilder": ...
-    def filter(self, filter: str) -> "AsyncPermutationBuilder": ...
-    async def execute(self) -> Table: ...
-
-def async_permutation_builder(
-    table: Table, dest_table_name: str
-) -> AsyncPermutationBuilder: ...
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -5,20 +5,11 @@
 from __future__ import annotations

 from abc import abstractmethod
-from datetime import timedelta
 from pathlib import Path
-import sys
 from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union

-if sys.version_info >= (3, 12):
-    from typing import override
-
-    class EnforceOverrides:
-        pass
-else:
-    from overrides import EnforceOverrides, override  # type: ignore
-
 from lancedb.embeddings.registry import EmbeddingFunctionRegistry
+from overrides import EnforceOverrides, override  # type: ignore

 from lancedb.common import data_to_reader, sanitize_uri, validate_schema
 from lancedb.background_loop import LOOP
@@ -41,6 +32,7 @@ import deprecation
 if TYPE_CHECKING:
    import pyarrow as pa
    from .pydantic import LanceModel
+    from datetime import timedelta

    from ._lancedb import Connection as LanceDbConnection
    from .common import DATA, URI
@@ -452,12 +444,7 @@ class LanceDBConnection(DBConnection):
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
        session: Optional[Session] = None,
-        _inner: Optional[LanceDbConnection] = None,
    ):
-        if _inner is not None:
-            self._conn = _inner
-            return
-
        if not isinstance(uri, Path):
            scheme = get_uri_scheme(uri)
        is_local = isinstance(uri, Path) or scheme == "file"
@@ -466,6 +453,11 @@ class LanceDBConnection(DBConnection):
                uri = Path(uri)
            uri = uri.expanduser().absolute()
            Path(uri).mkdir(parents=True, exist_ok=True)
+        self._uri = str(uri)
+        self._entered = False
+        self.read_consistency_interval = read_consistency_interval
+        self.storage_options = storage_options
+        self.session = session

        if read_consistency_interval is not None:
            read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -484,32 +476,10 @@ class LanceDBConnection(DBConnection):
                session,
            )

-        # TODO: It would be nice if we didn't store self.storage_options but it is
-        # currently used by the LanceTable.to_lance method.  This doesn't _really_
-        # work because some paths like LanceDBConnection.from_inner will lose the
-        # storage_options.  Also, this class really shouldn't be holding any state
-        # beyond _conn.
-        self.storage_options = storage_options
        self._conn = AsyncConnection(LOOP.run(do_connect()))

-    @property
-    def read_consistency_interval(self) -> Optional[timedelta]:
-        return LOOP.run(self._conn.get_read_consistency_interval())
-
-    @property
-    def session(self) -> Optional[Session]:
-        return self._conn.session
-
-    @property
-    def uri(self) -> str:
-        return self._conn.uri
-
-    @classmethod
-    def from_inner(cls, inner: LanceDbConnection):
-        return cls(None, _inner=inner)
-
    def __repr__(self) -> str:
-        val = f"{self.__class__.__name__}(uri={self._conn.uri!r}"
+        val = f"{self.__class__.__name__}(uri={self._uri!r}"
        if self.read_consistency_interval is not None:
            val += f", read_consistency_interval={repr(self.read_consistency_interval)}"
        val += ")"
@@ -519,10 +489,6 @@ class LanceDBConnection(DBConnection):
        conn = AsyncConnection(await lancedb_connect(self.uri))
        return await conn.table_names(start_after=start_after, limit=limit)

-    @property
-    def _inner(self) -> LanceDbConnection:
-        return self._conn._inner
-
    @override
    def list_namespaces(
        self,
@@ -882,13 +848,6 @@ class AsyncConnection(object):
    def uri(self) -> str:
        return self._inner.uri

-    async def get_read_consistency_interval(self) -> Optional[timedelta]:
-        interval_secs = await self._inner.get_read_consistency_interval()
-        if interval_secs is not None:
-            return timedelta(seconds=interval_secs)
-        else:
-            return None
-
    async def list_namespaces(
        self,
        namespace: List[str] = [],
--- a/python/python/lancedb/embeddings/colpali.py
+++ b/python/python/lancedb/embeddings/colpali.py
@@ -3,11 +3,9 @@


 from functools import lru_cache
-from logging import warning
-from typing import List, Union, Optional, Any, Callable
+from typing import List, Union, Optional, Any
 import numpy as np
 import io
-import warnings

 from ..util import attempt_import_or_raise
 from .base import EmbeddingFunction
@@ -21,52 +19,35 @@ class ColPaliEmbeddings(EmbeddingFunction):
    An embedding function that uses the ColPali engine for
    multimodal multi-vector embeddings.

-    This embedding function supports ColPali models, producing multivector outputs
-    for both text and image inputs.
+    This embedding function supports ColQwen2.5 models, producing multivector outputs
+    for both text and image inputs. The output embeddings are lists of vectors, each
+    vector being 128-dimensional by default, represented as List[List[float]].

    Parameters
    ----------
    model_name : str
        The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
-        Supports models based on these engines:
-        - ColPali: "vidore/colpali-v1.3" and others
-        - ColQwen2.5: "Metric-AI/ColQwen2.5-3b-multilingual-v1.0" and others
-        - ColQwen2: "vidore/colqwen2-v1.0" and others
-        - ColSmol: "vidore/colSmol-256M" and others
-
    device : str
-        The device for inference (default "auto").
+        The device for inference (default "cuda:0").
    dtype : str
        Data type for model weights (default "bfloat16").
    use_token_pooling : bool
-        DEPRECATED. Whether to use token pooling. Use `pooling_strategy` instead.
-    pooling_strategy : str, optional
-        The token pooling strategy to use, by default "hierarchical".
-        - "hierarchical": Progressively pools tokens to reduce sequence length.
-        - "lambda": A simpler pooling that uses a custom `pooling_func`.
-    pooling_func: typing.Callable, optional
-        A function to use for pooling when `pooling_strategy` is "lambda".
+        Whether to use token pooling to reduce embedding size (default True).
    pool_factor : int
        Factor to reduce sequence length if token pooling is enabled (default 2).
    quantization_config : Optional[BitsAndBytesConfig]
        Quantization configuration for the model. (default None, bitsandbytes needed)
    batch_size : int
        Batch size for processing inputs (default 2).
-    offload_folder: str, optional
-        Folder to offload model weights if using CPU offloading (default None). This is
-        useful for large models that do not fit in memory.
    """

    model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
    device: str = "auto"
    dtype: str = "bfloat16"
    use_token_pooling: bool = True
-    pooling_strategy: Optional[str] = "hierarchical"
-    pooling_func: Optional[Any] = None
    pool_factor: int = 2
    quantization_config: Optional[Any] = None
    batch_size: int = 2
-    offload_folder: Optional[str] = None

    _model = None
    _processor = None
@@ -75,43 +56,15 @@ class ColPaliEmbeddings(EmbeddingFunction):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
-        torch = attempt_import_or_raise("torch", "torch")
-
-        if not self.use_token_pooling:
-            warnings.warn(
-                "use_token_pooling is deprecated, use pooling_strategy=None instead",
-                DeprecationWarning,
-            )
-            self.pooling_strategy = None
-
-        if self.pooling_strategy == "lambda" and self.pooling_func is None:
-            raise ValueError(
-                "pooling_func must be provided when pooling_strategy is 'lambda'"
-            )
-
-        device = self.device
-        if device == "auto":
-            if torch.cuda.is_available():
-                device = "cuda"
-            elif torch.backends.mps.is_available():
-                device = "mps"
-            else:
-                device = "cpu"
-
-        dtype = self.dtype
-        if device == "mps" and dtype == "bfloat16":
-            dtype = "float32"  # Avoid NaNs on MPS
-
        (
            self._model,
            self._processor,
            self._token_pooler,
        ) = self._load_model(
            self.model_name,
-            dtype,
-            device,
-            self.pooling_strategy,
-            self.pooling_func,
+            self.dtype,
+            self.device,
+            self.use_token_pooling,
            self.quantization_config,
        )

@@ -121,26 +74,16 @@ class ColPaliEmbeddings(EmbeddingFunction):
        model_name: str,
        dtype: str,
        device: str,
-        pooling_strategy: Optional[str],
-        pooling_func: Optional[Callable],
+        use_token_pooling: bool,
        quantization_config: Optional[Any],
    ):
        """
        Initialize and cache the ColPali model, processor, and token pooler.
        """
-        if device.startswith("mps"):
-            # warn some torch ops in late interaction architecture result in nans on mps
-            warning(
-                "MPS device detected. Some operations may result in NaNs. "
-                "If you encounter issues, consider using 'cpu' or 'cuda' devices."
-            )
        torch = attempt_import_or_raise("torch", "torch")
        transformers = attempt_import_or_raise("transformers", "transformers")
        colpali_engine = attempt_import_or_raise("colpali_engine", "colpali_engine")
-        from colpali_engine.compression.token_pooling import (
-            HierarchicalTokenPooler,
-            LambdaTokenPooler,
-        )
+        from colpali_engine.compression.token_pooling import HierarchicalTokenPooler

        if quantization_config is not None:
            if not isinstance(quantization_config, transformers.BitsAndBytesConfig):
@@ -155,45 +98,21 @@ class ColPaliEmbeddings(EmbeddingFunction):
        else:
            torch_dtype = torch.float32

-        model_class, processor_class = None, None
-        model_name_lower = model_name.lower()
-        if "colqwen2.5" in model_name_lower:
-            model_class = colpali_engine.models.ColQwen2_5
-            processor_class = colpali_engine.models.ColQwen2_5_Processor
-        elif "colsmol" in model_name_lower or "colidefics3" in model_name_lower:
-            model_class = colpali_engine.models.ColIdefics3
-            processor_class = colpali_engine.models.ColIdefics3Processor
-        elif "colqwen" in model_name_lower:
-            model_class = colpali_engine.models.ColQwen2
-            processor_class = colpali_engine.models.ColQwen2Processor
-        elif "colpali" in model_name_lower:
-            model_class = colpali_engine.models.ColPali
-            processor_class = colpali_engine.models.ColPaliProcessor
-
-        if model_class is None:
-            raise ValueError(f"Unsupported model: {model_name}")
-
-        model = model_class.from_pretrained(
+        model = colpali_engine.models.ColQwen2_5.from_pretrained(
            model_name,
            torch_dtype=torch_dtype,
+            device_map=device,
            quantization_config=quantization_config
            if quantization_config is not None
            else None,
            attn_implementation="flash_attention_2"
            if is_flash_attn_2_available()
            else None,
-            low_cpu_mem_usage=True,
        ).eval()
-        model = model.to(device)
-        model = model.to(torch_dtype)  # Force cast after moving to device
-        processor = processor_class.from_pretrained(model_name)
-
-        token_pooler = None
-        if pooling_strategy == "hierarchical":
-            token_pooler = HierarchicalTokenPooler()
-        elif pooling_strategy == "lambda":
-            token_pooler = LambdaTokenPooler(pool_func=pooling_func)
-
+        processor = colpali_engine.models.ColQwen2_5_Processor.from_pretrained(
+            model_name
+        )
+        token_pooler = HierarchicalTokenPooler() if use_token_pooling else None
        return model, processor, token_pooler

    def ndims(self):
@@ -209,7 +128,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
            with torch.no_grad():
                query_embeddings = self._model(**batch_queries)

-            if self.pooling_strategy and self._token_pooler is not None:
+            if self.use_token_pooling and self._token_pooler is not None:
                query_embeddings = self._token_pooler.pool_embeddings(
                    query_embeddings,
                    pool_factor=self.pool_factor,
@@ -226,20 +145,13 @@ class ColPaliEmbeddings(EmbeddingFunction):
        Use token pooling if enabled.
        """
        torch = attempt_import_or_raise("torch", "torch")
-        if self.pooling_strategy and self._token_pooler is not None:
-            if self.pooling_strategy == "hierarchical":
-                embeddings = self._token_pooler.pool_embeddings(
-                    embeddings,
-                    pool_factor=self.pool_factor,
-                    padding=True,
-                    padding_side=self._processor.tokenizer.padding_side,
-                )
-            elif self.pooling_strategy == "lambda":
-                embeddings = self._token_pooler.pool_embeddings(
-                    embeddings,
-                    padding=True,
-                    padding_side=self._processor.tokenizer.padding_side,
-                )
+        if self.use_token_pooling and self._token_pooler is not None:
+            embeddings = self._token_pooler.pool_embeddings(
+                embeddings,
+                pool_factor=self.pool_factor,
+                padding=True,
+                padding_side=self._processor.tokenizer.padding_side,
+            )

        if isinstance(embeddings, torch.Tensor):
            tensors = embeddings.detach().cpu()
@@ -267,7 +179,6 @@ class ColPaliEmbeddings(EmbeddingFunction):
            )
            with torch.no_grad():
                query_embeddings = self._model(**batch_queries)
-            query_embeddings = torch.nan_to_num(query_embeddings)
            all_embeddings.extend(self._process_embeddings(query_embeddings))
        return all_embeddings

@@ -314,7 +225,6 @@ class ColPaliEmbeddings(EmbeddingFunction):
            )
            with torch.no_grad():
                image_embeddings = self._model(**batch_images)
-            image_embeddings = torch.nan_to_num(image_embeddings)
            all_embeddings.extend(self._process_embeddings(image_embeddings))
        return all_embeddings

--- a/python/python/lancedb/embeddings/voyageai.py
+++ b/python/python/lancedb/embeddings/voyageai.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import base64
 import os
-from typing import ClassVar, TYPE_CHECKING, List, Union, Any, Generator
+from typing import ClassVar, TYPE_CHECKING, List, Union, Any

 from pathlib import Path
 from urllib.parse import urlparse
@@ -19,23 +19,6 @@ from .utils import api_key_not_found_help, IMAGES, TEXT
 if TYPE_CHECKING:
    import PIL

-# Token limits for different VoyageAI models
-VOYAGE_TOTAL_TOKEN_LIMITS = {
-    "voyage-context-3": 32_000,
-    "voyage-3.5-lite": 1_000_000,
-    "voyage-3.5": 320_000,
-    "voyage-3-lite": 120_000,
-    "voyage-3": 120_000,
-    "voyage-multimodal-3": 120_000,
-    "voyage-finance-2": 120_000,
-    "voyage-multilingual-2": 120_000,
-    "voyage-law-2": 120_000,
-    "voyage-code-2": 120_000,
-}
-
-# Batch size for embedding requests (max number of items per batch)
-BATCH_SIZE = 1000
-

 def is_valid_url(text):
    try:
@@ -137,9 +120,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
    name: str
        The name of the model to use. List of acceptable models:

-            * voyage-context-3
-            * voyage-3.5
-            * voyage-3.5-lite
            * voyage-3
            * voyage-3-lite
            * voyage-multimodal-3
@@ -177,35 +157,25 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
    name: str
    client: ClassVar = None
    text_embedding_models: list = [
-        "voyage-3.5",
-        "voyage-3.5-lite",
        "voyage-3",
        "voyage-3-lite",
        "voyage-finance-2",
-        "voyage-multilingual-2",
        "voyage-law-2",
        "voyage-code-2",
    ]
    multimodal_embedding_models: list = ["voyage-multimodal-3"]
-    contextual_embedding_models: list = ["voyage-context-3"]

    def _is_multimodal_model(self, model_name: str):
        return (
            model_name in self.multimodal_embedding_models or "multimodal" in model_name
        )

-    def _is_contextual_model(self, model_name: str):
-        return model_name in self.contextual_embedding_models or "context" in model_name
-
    def ndims(self):
        if self.name == "voyage-3-lite":
            return 512
        elif self.name == "voyage-code-2":
            return 1536
        elif self.name in [
-            "voyage-context-3",
-            "voyage-3.5",
-            "voyage-3.5-lite",
            "voyage-3",
            "voyage-multimodal-3",
            "voyage-finance-2",
@@ -237,11 +207,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            result = client.multimodal_embed(
                inputs=[[query]], model=self.name, input_type="query", **kwargs
            )
-        elif self._is_contextual_model(self.name):
-            result = client.contextualized_embed(
-                inputs=[[query]], model=self.name, input_type="query", **kwargs
-            )
-            result = result.results[0]
        else:
            result = client.embed(
                texts=[query], model=self.name, input_type="query", **kwargs
@@ -266,164 +231,18 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
            List[np.array]: the list of embeddings
        """
        client = VoyageAIEmbeddingFunction._get_client()
-
-        # For multimodal models, check if inputs contain images
        if self._is_multimodal_model(self.name):
-            sanitized = sanitize_multimodal_input(inputs)
-            has_images = any(
-                inp["content"][0].get("type") != "text" for inp in sanitized
+            inputs = sanitize_multimodal_input(inputs)
+            result = client.multimodal_embed(
+                inputs=inputs, model=self.name, input_type="document", **kwargs
            )
-            if has_images:
-                # Use non-batched API for images
-                result = client.multimodal_embed(
-                    inputs=sanitized, model=self.name, input_type="document", **kwargs
-                )
-                return result.embeddings
-            # Extract texts for batching
-            inputs = [inp["content"][0]["text"] for inp in sanitized]
        else:
            inputs = sanitize_text_input(inputs)
+            result = client.embed(
+                texts=inputs, model=self.name, input_type="document", **kwargs
+            )

-        # Use batching for all text inputs
-        return self._embed_with_batching(
-            client, inputs, input_type="document", **kwargs
-        )
-
-    def _build_batches(
-        self, client, texts: List[str]
-    ) -> Generator[List[str], None, None]:
-        """
-        Generate batches of texts based on token limits using a generator.
-
-        Parameters
-        ----------
-        client : voyageai.Client
-            The VoyageAI client instance.
-        texts : List[str]
-            List of texts to batch.
-
-        Yields
-        ------
-            List[str]: Batches of texts.
-        """
-        if not texts:
-            return
-
-        max_tokens_per_batch = VOYAGE_TOTAL_TOKEN_LIMITS.get(self.name, 120_000)
-        current_batch: List[str] = []
-        current_batch_tokens = 0
-
-        # Tokenize all texts in one API call
-        token_lists = client.tokenize(texts, model=self.name)
-        token_counts = [len(token_list) for token_list in token_lists]
-
-        for i, text in enumerate(texts):
-            n_tokens = token_counts[i]
-
-            # Check if adding this text would exceed limits
-            if current_batch and (
-                len(current_batch) >= BATCH_SIZE
-                or (current_batch_tokens + n_tokens > max_tokens_per_batch)
-            ):
-                # Yield the current batch and start a new one
-                yield current_batch
-                current_batch = []
-                current_batch_tokens = 0
-
-            current_batch.append(text)
-            current_batch_tokens += n_tokens
-
-        # Yield the last batch (always has at least one text)
-        if current_batch:
-            yield current_batch
-
-    def _get_embed_function(
-        self, client, input_type: str = "document", **kwargs
-    ) -> callable:
-        """
-        Get the appropriate embedding function based on model type.
-
-        Parameters
-        ----------
-        client : voyageai.Client
-            The VoyageAI client instance.
-        input_type : str
-            Either "query" or "document"
-        **kwargs
-            Additional arguments to pass to the embedding API
-
-        Returns
-        -------
-            callable: A function that takes a batch of texts and returns embeddings.
-        """
-        if self._is_multimodal_model(self.name):
-
-            def embed_batch(batch: List[str]) -> List[np.array]:
-                batch_inputs = sanitize_multimodal_input(batch)
-                result = client.multimodal_embed(
-                    inputs=batch_inputs,
-                    model=self.name,
-                    input_type=input_type,
-                    **kwargs,
-                )
-                return result.embeddings
-
-            return embed_batch
-
-        elif self._is_contextual_model(self.name):
-
-            def embed_batch(batch: List[str]) -> List[np.array]:
-                result = client.contextualized_embed(
-                    inputs=[batch], model=self.name, input_type=input_type, **kwargs
-                )
-                return result.results[0].embeddings
-
-            return embed_batch
-
-        else:
-
-            def embed_batch(batch: List[str]) -> List[np.array]:
-                result = client.embed(
-                    texts=batch, model=self.name, input_type=input_type, **kwargs
-                )
-                return result.embeddings
-
-            return embed_batch
-
-    def _embed_with_batching(
-        self, client, texts: List[str], input_type: str = "document", **kwargs
-    ) -> List[np.array]:
-        """
-        Embed texts with automatic batching based on token limits.
-
-        Parameters
-        ----------
-        client : voyageai.Client
-            The VoyageAI client instance.
-        texts : List[str]
-            List of texts to embed.
-        input_type : str
-            Either "query" or "document"
-        **kwargs
-            Additional arguments to pass to the embedding API
-
-        Returns
-        -------
-            List[np.array]: List of embeddings.
-        """
-        if not texts:
-            return []
-
-        # Get the appropriate embedding function for this model type
-        embed_fn = self._get_embed_function(client, input_type=input_type, **kwargs)
-
-        # Process each batch
-        all_embeddings = []
-        for batch in self._build_batches(client, texts):
-            batch_embeddings = embed_fn(batch)
-            all_embeddings.extend(batch_embeddings)
-
-        return all_embeddings
+        return result.embeddings

    @staticmethod
    def _get_client():
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
@@ -12,18 +12,13 @@ from __future__ import annotations

 from typing import Dict, Iterable, List, Optional, Union
 import os
-import sys
-
-if sys.version_info >= (3, 12):
-    from typing import override
-else:
-    from overrides import override

 from lancedb.db import DBConnection
 from lancedb.table import LanceTable, Table
 from lancedb.util import validate_table_name
 from lancedb.common import validate_schema
 from lancedb.table import sanitize_create_table
+from overrides import override

 from lance_namespace import LanceNamespace, connect as namespace_connect
 from lance_namespace_urllib3_client.models import (
--- a/python/python/lancedb/permutation.py
+++ b/python/python/lancedb/permutation.py
@@ -1,72 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-from ._lancedb import async_permutation_builder
-from .table import LanceTable
-from .background_loop import LOOP
-from typing import Optional
-
-
-class PermutationBuilder:
-    def __init__(self, table: LanceTable):
-        self._async = async_permutation_builder(table)
-
-    def select(self, projections: dict[str, str]) -> "PermutationBuilder":
-        self._async.select(projections)
-        return self
-
-    def split_random(
-        self,
-        *,
-        ratios: Optional[list[float]] = None,
-        counts: Optional[list[int]] = None,
-        fixed: Optional[int] = None,
-        seed: Optional[int] = None,
-    ) -> "PermutationBuilder":
-        self._async.split_random(ratios=ratios, counts=counts, fixed=fixed, seed=seed)
-        return self
-
-    def split_hash(
-        self,
-        columns: list[str],
-        split_weights: list[int],
-        *,
-        discard_weight: Optional[int] = None,
-    ) -> "PermutationBuilder":
-        self._async.split_hash(columns, split_weights, discard_weight=discard_weight)
-        return self
-
-    def split_sequential(
-        self,
-        *,
-        ratios: Optional[list[float]] = None,
-        counts: Optional[list[int]] = None,
-        fixed: Optional[int] = None,
-    ) -> "PermutationBuilder":
-        self._async.split_sequential(ratios=ratios, counts=counts, fixed=fixed)
-        return self
-
-    def split_calculated(self, calculation: str) -> "PermutationBuilder":
-        self._async.split_calculated(calculation)
-        return self
-
-    def shuffle(
-        self, *, seed: Optional[int] = None, clump_size: Optional[int] = None
-    ) -> "PermutationBuilder":
-        self._async.shuffle(seed=seed, clump_size=clump_size)
-        return self
-
-    def filter(self, filter: str) -> "PermutationBuilder":
-        self._async.filter(filter)
-        return self
-
-    def execute(self) -> LanceTable:
-        async def do_execute():
-            inner_tbl = await self._async.execute()
-            return LanceTable.from_inner(inner_tbl)
-
-        return LOOP.run(do_execute())
-
-
-def permutation_builder(table: LanceTable) -> PermutationBuilder:
-    return PermutationBuilder(table)
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -1237,14 +1237,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        self._refine_factor = refine_factor
        return self

-    def output_schema(self) -> pa.Schema:
-        """
-        Return the output schema for the query
-
-        This does not execute the query.
-        """
-        return self._table._output_schema(self.to_query_object())
-
    def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
        """
        Execute the query and return the results as an
@@ -1460,14 +1452,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
            offset=self._offset,
        )

-    def output_schema(self) -> pa.Schema:
-        """
-        Return the output schema for the query
-
-        This does not execute the query.
-        """
-        return self._table._output_schema(self.to_query_object())
-
    def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
        path, fs, exist = self._table._get_fts_index_path()
        if exist:
@@ -1611,10 +1595,6 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
            offset=self._offset,
        )

-    def output_schema(self) -> pa.Schema:
-        query = self.to_query_object()
-        return self._table._output_schema(query)
-
    def to_batches(
        self, /, batch_size: Optional[int] = None, timeout: Optional[timedelta] = None
    ) -> pa.RecordBatchReader:
@@ -2258,14 +2238,6 @@ class AsyncQueryBase(object):
            )
        )

-    async def output_schema(self) -> pa.Schema:
-        """
-        Return the output schema for the query
-
-        This does not execute the query.
-        """
-        return await self._inner.output_schema()
-
    async def to_arrow(self, timeout: Optional[timedelta] = None) -> pa.Table:
        """
        Execute the query and collect the results into an Apache Arrow Table.
@@ -3221,14 +3193,6 @@ class BaseQueryBuilder(object):
        self._inner.with_row_id()
        return self

-    def output_schema(self) -> pa.Schema:
-        """
-        Return the output schema for the query
-
-        This does not execute the query.
-        """
-        return LOOP.run(self._inner.output_schema())
-
    def to_batches(
        self,
        *,
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -5,20 +5,15 @@
 from datetime import timedelta
 import logging
 from concurrent.futures import ThreadPoolExecutor
-import sys
 from typing import Any, Dict, Iterable, List, Optional, Union
 from urllib.parse import urlparse
 import warnings

-if sys.version_info >= (3, 12):
-    from typing import override
-else:
-    from overrides import override
-
 # Remove this import to fix circular dependency
 # from lancedb import connect_async
 from lancedb.remote import ClientConfig
 import pyarrow as pa
+from overrides import override

 from ..common import DATA
 from ..db import DBConnection, LOOP
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -114,7 +114,7 @@ class RemoteTable(Table):
        index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
        *,
        replace: bool = False,
-        wait_timeout: Optional[timedelta] = None,
+        wait_timeout: timedelta = None,
        name: Optional[str] = None,
    ):
        """Creates a scalar index
@@ -153,7 +153,7 @@ class RemoteTable(Table):
        column: str,
        *,
        replace: bool = False,
-        wait_timeout: Optional[timedelta] = None,
+        wait_timeout: timedelta = None,
        with_position: bool = False,
        # tokenizer configs:
        base_tokenizer: str = "simple",
@@ -436,9 +436,6 @@ class RemoteTable(Table):
    def _analyze_plan(self, query: Query) -> str:
        return LOOP.run(self._table._analyze_plan(query))

-    def _output_schema(self, query: Query) -> pa.Schema:
-        return LOOP.run(self._table._output_schema(query))
-
    def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
        """Returns a [`LanceMergeInsertBuilder`][lancedb.merge.LanceMergeInsertBuilder]
        that can be used to create a "merge insert" operation.
--- a/python/python/lancedb/rerankers/voyageai.py
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -21,8 +21,6 @@ class VoyageAIReranker(Reranker):
    ----------
    model_name : str, default "rerank-english-v2.0"
        The name of the cross encoder model to use. Available voyageai models are:
-        - rerank-2.5
-        - rerank-2.5-lite
        - rerank-2
        - rerank-2-lite
    column : str, default "text"
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -74,7 +74,6 @@ from .index import lang_mapping


 if TYPE_CHECKING:
-    from .db import LanceDBConnection
    from ._lancedb import (
        Table as LanceDBTable,
        OptimizeStats,
@@ -89,6 +88,7 @@ if TYPE_CHECKING:
        MergeResult,
        UpdateResult,
    )
+    from .db import LanceDBConnection
    from .index import IndexConfig
    import pandas
    import PIL
@@ -1248,9 +1248,6 @@ class Table(ABC):
    @abstractmethod
    def _analyze_plan(self, query: Query) -> str: ...

-    @abstractmethod
-    def _output_schema(self, query: Query) -> pa.Schema: ...
-
    @abstractmethod
    def _do_merge(
        self,
@@ -1710,38 +1707,22 @@ class LanceTable(Table):
        namespace: List[str] = [],
        storage_options: Optional[Dict[str, str]] = None,
        index_cache_size: Optional[int] = None,
-        _async: AsyncTable = None,
    ):
        self._conn = connection
        self._namespace = namespace
-        if _async is not None:
-            self._table = _async
-        else:
-            self._table = LOOP.run(
-                connection._conn.open_table(
-                    name,
-                    namespace=namespace,
-                    storage_options=storage_options,
-                    index_cache_size=index_cache_size,
-                )
+        self._table = LOOP.run(
+            connection._conn.open_table(
+                name,
+                namespace=namespace,
+                storage_options=storage_options,
+                index_cache_size=index_cache_size,
            )
+        )

    @property
    def name(self) -> str:
        return self._table.name

-    @classmethod
-    def from_inner(cls, tbl: LanceDBTable):
-        from .db import LanceDBConnection
-
-        async_tbl = AsyncTable(tbl)
-        conn = LanceDBConnection.from_inner(tbl.database())
-        return cls(
-            conn,
-            async_tbl.name,
-            _async=async_tbl,
-        )
-
    @classmethod
    def open(cls, db, name, *, namespace: List[str] = [], **kwargs):
        tbl = cls(db, name, namespace=namespace, **kwargs)
@@ -2764,9 +2745,6 @@ class LanceTable(Table):
    def _analyze_plan(self, query: Query) -> str:
        return LOOP.run(self._table._analyze_plan(query))

-    def _output_schema(self, query: Query) -> pa.Schema:
-        return LOOP.run(self._table._output_schema(query))
-
    def _do_merge(
        self,
        merge: LanceMergeInsertBuilder,
@@ -2778,10 +2756,6 @@ class LanceTable(Table):
            self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
        )

-    @property
-    def _inner(self) -> LanceDBTable:
-        return self._table._inner
-
    @deprecation.deprecated(
        deprecated_in="0.21.0",
        current_version=__version__,
@@ -3924,10 +3898,6 @@ class AsyncTable:
        async_query = self._sync_query_to_async(query)
        return await async_query.analyze_plan()

-    async def _output_schema(self, query: Query) -> pa.Schema:
-        async_query = self._sync_query_to_async(query)
-        return await async_query.output_schema()
-
    async def _do_merge(
        self,
        merge: LanceMergeInsertBuilder,
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -532,27 +532,6 @@ def test_voyageai_embedding_function():
    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()


-@pytest.mark.slow
-@pytest.mark.skipif(
-    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
-)
-def test_voyageai_embedding_function_contextual_model():
-    voyageai = (
-        get_registry().get("voyageai").create(name="voyage-context-3", max_retries=0)
-    )
-
-    class TextModel(LanceModel):
-        text: str = voyageai.SourceField()
-        vector: Vector(voyageai.ndims()) = voyageai.VectorField()
-
-    df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
-    db = lancedb.connect("~/lancedb")
-    tbl = db.create_table("test", schema=TextModel, mode="overwrite")
-
-    tbl.add(df)
-    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
-
-
@pytest.mark.slow
@pytest.mark.skipif(
    os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
@@ -677,106 +656,6 @@ def test_colpali(tmp_path):
    )


-@pytest.mark.slow
-@pytest.mark.skipif(
-    importlib.util.find_spec("colpali_engine") is None,
-    reason="colpali_engine not installed",
-)
-@pytest.mark.parametrize(
-    "model_name",
-    [
-        "vidore/colSmol-256M",
-        "vidore/colqwen2.5-v0.2",
-        "vidore/colpali-v1.3",
-        "vidore/colqwen2-v1.0",
-    ],
-)
-def test_colpali_models(tmp_path, model_name):
-    import requests
-    from lancedb.pydantic import LanceModel
-
-    db = lancedb.connect(tmp_path)
-    registry = get_registry()
-    func = registry.get("colpali").create(model_name=model_name)
-
-    class MediaItems(LanceModel):
-        text: str
-        image_uri: str = func.SourceField()
-        image_bytes: bytes = func.SourceField()
-        image_vectors: MultiVector(func.ndims()) = func.VectorField()
-
-    table = db.create_table(f"media_{model_name.replace('/', '_')}", schema=MediaItems)
-
-    texts = [
-        "a cute cat playing with yarn",
-    ]
-
-    uris = [
-        "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
-    ]
-
-    image_bytes = [requests.get(uri).content for uri in uris]
-
-    table.add(
-        pd.DataFrame({"text": texts, "image_uri": uris, "image_bytes": image_bytes})
-    )
-
-    image_results = (
-        table.search("fluffy companion", vector_column_name="image_vectors")
-        .limit(1)
-        .to_pydantic(MediaItems)[0]
-    )
-    assert "cat" in image_results.text.lower() or "puppy" in image_results.text.lower()
-
-    first_row = table.to_arrow().to_pylist()[0]
-    assert len(first_row["image_vectors"]) > 1, "Should have multiple image vectors"
-    assert len(first_row["image_vectors"][0]) == func.ndims(), (
-        "Vector dimension mismatch"
-    )
-
-
-@pytest.mark.slow
-@pytest.mark.skipif(
-    importlib.util.find_spec("colpali_engine") is None,
-    reason="colpali_engine not installed",
-)
-def test_colpali_pooling(tmp_path):
-    registry = get_registry()
-    model_name = "vidore/colSmol-256M"
-    test_sentence = "a test sentence for pooling"
-
-    # 1. Get embeddings with no pooling
-    func_no_pool = registry.get("colpali").create(
-        model_name=model_name, pooling_strategy=None
-    )
-    unpooled_embeddings = func_no_pool.generate_text_embeddings([test_sentence])[0]
-    original_length = len(unpooled_embeddings)
-    assert original_length > 1
-
-    # 2. Test hierarchical pooling
-    func_hierarchical = registry.get("colpali").create(
-        model_name=model_name, pooling_strategy="hierarchical", pool_factor=2
-    )
-    hierarchical_embeddings = func_hierarchical.generate_text_embeddings(
-        [test_sentence]
-    )[0]
-    expected_hierarchical_length = (original_length + 1) // 2
-    assert len(hierarchical_embeddings) == expected_hierarchical_length
-
-    # 3. Test lambda pooling
-    def simple_pool_func(tensor):
-        return tensor[::2]
-
-    func_lambda = registry.get("colpali").create(
-        model_name=model_name,
-        pooling_strategy="lambda",
-        pooling_func=simple_pool_func,
-    )
-    lambda_embeddings = func_lambda.generate_text_embeddings([test_sentence])[0]
-    expected_lambda_length = (original_length + 1) // 2
-    assert len(lambda_embeddings) == expected_lambda_length
-
-
@pytest.mark.slow
 def test_siglip(tmp_path, test_images, query_image_bytes):
    from PIL import Image
--- a/python/python/tests/test_namespace.py
+++ b/python/python/tests/test_namespace.py
@@ -59,14 +59,6 @@ class TempNamespace(LanceNamespace):
            root
        ]  # Reference to shared namespaces

-    def namespace_id(self) -> str:
-        """Return a human-readable unique identifier for this namespace instance.
-
-        Returns:
-            A unique identifier string based on the root directory
-        """
-        return f"TempNamespace {{ root: '{self.config.root}' }}"
-
    def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
        """List all tables in the namespace."""
        if not request.id:
--- a/python/python/tests/test_permutation.py
+++ b/python/python/tests/test_permutation.py
@@ -1,462 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import pyarrow as pa
-import pytest
-
-from lancedb.permutation import permutation_builder
-
-
-def test_split_random_ratios(mem_db):
-    """Test random splitting with ratios."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"x": range(100), "y": range(100)})
-    )
-    permutation_tbl = permutation_builder(tbl).split_random(ratios=[0.3, 0.7]).execute()
-
-    # Check that the table was created and has data
-    assert permutation_tbl.count_rows() == 100
-
-    # Check that split_id column exists and has correct values
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    split_ids = data["split_id"]
-    assert set(split_ids) == {0, 1}
-
-    # Check approximate split sizes (allowing for rounding)
-    split_0_count = split_ids.count(0)
-    split_1_count = split_ids.count(1)
-    assert 25 <= split_0_count <= 35  # ~30% ± tolerance
-    assert 65 <= split_1_count <= 75  # ~70% ± tolerance
-
-
-def test_split_random_counts(mem_db):
-    """Test random splitting with absolute counts."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"x": range(100), "y": range(100)})
-    )
-    permutation_tbl = permutation_builder(tbl).split_random(counts=[20, 30]).execute()
-
-    # Check that we have exactly the requested counts
-    assert permutation_tbl.count_rows() == 50
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    split_ids = data["split_id"]
-    assert split_ids.count(0) == 20
-    assert split_ids.count(1) == 30
-
-
-def test_split_random_fixed(mem_db):
-    """Test random splitting with fixed number of splits."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"x": range(100), "y": range(100)})
-    )
-    permutation_tbl = permutation_builder(tbl).split_random(fixed=4).execute()
-
-    # Check that we have 4 splits with 25 rows each
-    assert permutation_tbl.count_rows() == 100
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    split_ids = data["split_id"]
-    assert set(split_ids) == {0, 1, 2, 3}
-
-    for split_id in range(4):
-        assert split_ids.count(split_id) == 25
-
-
-def test_split_random_with_seed(mem_db):
-    """Test that seeded random splits are reproducible."""
-    tbl = mem_db.create_table("test_table", pa.table({"x": range(50), "y": range(50)}))
-
-    # Create two identical permutations with same seed
-    perm1 = permutation_builder(tbl).split_random(ratios=[0.6, 0.4], seed=42).execute()
-
-    perm2 = permutation_builder(tbl).split_random(ratios=[0.6, 0.4], seed=42).execute()
-
-    # Results should be identical
-    data1 = perm1.search(None).to_arrow().to_pydict()
-    data2 = perm2.search(None).to_arrow().to_pydict()
-
-    assert data1["row_id"] == data2["row_id"]
-    assert data1["split_id"] == data2["split_id"]
-
-
-def test_split_hash(mem_db):
-    """Test hash-based splitting."""
-    tbl = mem_db.create_table(
-        "test_table",
-        pa.table(
-            {
-                "id": range(100),
-                "category": (["A", "B", "C"] * 34)[:100],  # Repeating pattern
-                "value": range(100),
-            }
-        ),
-    )
-
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .split_hash(["category"], [1, 1], discard_weight=0)
-        .execute()
-    )
-
-    # Should have all 100 rows (no discard)
-    assert permutation_tbl.count_rows() == 100
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    split_ids = data["split_id"]
-    assert set(split_ids) == {0, 1}
-
-    # Verify that each split has roughly 50 rows (allowing for hash variance)
-    split_0_count = split_ids.count(0)
-    split_1_count = split_ids.count(1)
-    assert 30 <= split_0_count <= 70  # ~50 ± 20 tolerance for hash distribution
-    assert 30 <= split_1_count <= 70  # ~50 ± 20 tolerance for hash distribution
-
-    # Hash splits should be deterministic - same category should go to same split
-    # Let's verify by creating another permutation and checking consistency
-    perm2 = (
-        permutation_builder(tbl)
-        .split_hash(["category"], [1, 1], discard_weight=0)
-        .execute()
-    )
-
-    data2 = perm2.search(None).to_arrow().to_pydict()
-    assert data["split_id"] == data2["split_id"]  # Should be identical
-
-
-def test_split_hash_with_discard(mem_db):
-    """Test hash-based splitting with discard weight."""
-    tbl = mem_db.create_table(
-        "test_table",
-        pa.table({"id": range(100), "category": ["A", "B"] * 50, "value": range(100)}),
-    )
-
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .split_hash(["category"], [1, 1], discard_weight=2)  # Should discard ~50%
-        .execute()
-    )
-
-    # Should have fewer than 100 rows due to discard
-    row_count = permutation_tbl.count_rows()
-    assert row_count < 100
-    assert row_count > 0  # But not empty
-
-
-def test_split_sequential(mem_db):
-    """Test sequential splitting."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"x": range(100), "y": range(100)})
-    )
-
-    permutation_tbl = (
-        permutation_builder(tbl).split_sequential(counts=[30, 40]).execute()
-    )
-
-    assert permutation_tbl.count_rows() == 70
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-    split_ids = data["split_id"]
-
-    # Sequential should maintain order
-    assert row_ids == sorted(row_ids)
-
-    # First 30 should be split 0, next 40 should be split 1
-    assert split_ids[:30] == [0] * 30
-    assert split_ids[30:] == [1] * 40
-
-
-def test_split_calculated(mem_db):
-    """Test calculated splitting."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(100), "value": range(100)})
-    )
-
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .split_calculated("id % 3")  # Split based on id modulo 3
-        .execute()
-    )
-
-    assert permutation_tbl.count_rows() == 100
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-    split_ids = data["split_id"]
-
-    # Verify the calculation: each row's split_id should equal row_id % 3
-    for i, (row_id, split_id) in enumerate(zip(row_ids, split_ids)):
-        assert split_id == row_id % 3
-
-
-def test_split_error_cases(mem_db):
-    """Test error handling for invalid split parameters."""
-    tbl = mem_db.create_table("test_table", pa.table({"x": range(10), "y": range(10)}))
-
-    # Test split_random with no parameters
-    with pytest.raises(Exception):
-        permutation_builder(tbl).split_random().execute()
-
-    # Test split_random with multiple parameters
-    with pytest.raises(Exception):
-        permutation_builder(tbl).split_random(
-            ratios=[0.5, 0.5], counts=[5, 5]
-        ).execute()
-
-    # Test split_sequential with no parameters
-    with pytest.raises(Exception):
-        permutation_builder(tbl).split_sequential().execute()
-
-    # Test split_sequential with multiple parameters
-    with pytest.raises(Exception):
-        permutation_builder(tbl).split_sequential(ratios=[0.5, 0.5], fixed=2).execute()
-
-
-def test_shuffle_no_seed(mem_db):
-    """Test shuffling without a seed."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(100), "value": range(100)})
-    )
-
-    # Create a permutation with shuffling (no seed)
-    permutation_tbl = permutation_builder(tbl).shuffle().execute()
-
-    assert permutation_tbl.count_rows() == 100
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-
-    # Row IDs should not be in sequential order due to shuffling
-    # This is probabilistic but with 100 rows, it's extremely unlikely they'd stay
-    # in order
-    assert row_ids != list(range(100))
-
-
-def test_shuffle_with_seed(mem_db):
-    """Test that shuffling with a seed is reproducible."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(50), "value": range(50)})
-    )
-
-    # Create two identical permutations with same shuffle seed
-    perm1 = permutation_builder(tbl).shuffle(seed=42).execute()
-
-    perm2 = permutation_builder(tbl).shuffle(seed=42).execute()
-
-    # Results should be identical due to same seed
-    data1 = perm1.search(None).to_arrow().to_pydict()
-    data2 = perm2.search(None).to_arrow().to_pydict()
-
-    assert data1["row_id"] == data2["row_id"]
-    assert data1["split_id"] == data2["split_id"]
-
-
-def test_shuffle_with_clump_size(mem_db):
-    """Test shuffling with clump size."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(100), "value": range(100)})
-    )
-
-    # Create a permutation with shuffling using clumps
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .shuffle(clump_size=10)  # 10-row clumps
-        .execute()
-    )
-
-    assert permutation_tbl.count_rows() == 100
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-
-    for i in range(10):
-        start = row_ids[i * 10]
-        assert row_ids[i * 10 : (i + 1) * 10] == list(range(start, start + 10))
-
-
-def test_shuffle_different_seeds(mem_db):
-    """Test that different seeds produce different shuffle orders."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(50), "value": range(50)})
-    )
-
-    # Create two permutations with different shuffle seeds
-    perm1 = permutation_builder(tbl).split_random(fixed=2).shuffle(seed=42).execute()
-
-    perm2 = permutation_builder(tbl).split_random(fixed=2).shuffle(seed=123).execute()
-
-    # Results should be different due to different seeds
-    data1 = perm1.search(None).to_arrow().to_pydict()
-    data2 = perm2.search(None).to_arrow().to_pydict()
-
-    # Row order should be different
-    assert data1["row_id"] != data2["row_id"]
-
-
-def test_shuffle_combined_with_splits(mem_db):
-    """Test shuffling combined with different split strategies."""
-    tbl = mem_db.create_table(
-        "test_table",
-        pa.table(
-            {
-                "id": range(100),
-                "category": (["A", "B", "C"] * 34)[:100],
-                "value": range(100),
-            }
-        ),
-    )
-
-    # Test shuffle with random splits
-    perm_random = (
-        permutation_builder(tbl)
-        .split_random(ratios=[0.6, 0.4], seed=42)
-        .shuffle(seed=123, clump_size=None)
-        .execute()
-    )
-
-    # Test shuffle with hash splits
-    perm_hash = (
-        permutation_builder(tbl)
-        .split_hash(["category"], [1, 1], discard_weight=0)
-        .shuffle(seed=456, clump_size=5)
-        .execute()
-    )
-
-    # Test shuffle with sequential splits
-    perm_sequential = (
-        permutation_builder(tbl)
-        .split_sequential(counts=[40, 35])
-        .shuffle(seed=789, clump_size=None)
-        .execute()
-    )
-
-    # Verify all permutations work and have expected properties
-    assert perm_random.count_rows() == 100
-    assert perm_hash.count_rows() == 100
-    assert perm_sequential.count_rows() == 75
-
-    # Verify shuffle affected the order
-    data_random = perm_random.search(None).to_arrow().to_pydict()
-    data_sequential = perm_sequential.search(None).to_arrow().to_pydict()
-
-    assert data_random["row_id"] != list(range(100))
-    assert data_sequential["row_id"] != list(range(75))
-
-
-def test_no_shuffle_maintains_order(mem_db):
-    """Test that not calling shuffle maintains the original order."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(50), "value": range(50)})
-    )
-
-    # Create permutation without shuffle (should maintain some order)
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .split_sequential(counts=[25, 25])  # Sequential maintains order
-        .execute()
-    )
-
-    assert permutation_tbl.count_rows() == 50
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-
-    # With sequential splits and no shuffle, should maintain order
-    assert row_ids == list(range(50))
-
-
-def test_filter_basic(mem_db):
-    """Test basic filtering functionality."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(100), "value": range(100, 200)})
-    )
-
-    # Filter to only include rows where id < 50
-    permutation_tbl = permutation_builder(tbl).filter("id < 50").execute()
-
-    assert permutation_tbl.count_rows() == 50
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-
-    # All row_ids should be less than 50
-    assert all(row_id < 50 for row_id in row_ids)
-
-
-def test_filter_with_splits(mem_db):
-    """Test filtering combined with split strategies."""
-    tbl = mem_db.create_table(
-        "test_table",
-        pa.table(
-            {
-                "id": range(100),
-                "category": (["A", "B", "C"] * 34)[:100],
-                "value": range(100),
-            }
-        ),
-    )
-
-    # Filter to only category A and B, then split
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .filter("category IN ('A', 'B')")
-        .split_random(ratios=[0.5, 0.5])
-        .execute()
-    )
-
-    # Should have fewer than 100 rows due to filtering
-    row_count = permutation_tbl.count_rows()
-    assert row_count == 67
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    categories = data["category"]
-
-    # All categories should be A or B
-    assert all(cat in ["A", "B"] for cat in categories)
-
-
-def test_filter_with_shuffle(mem_db):
-    """Test filtering combined with shuffling."""
-    tbl = mem_db.create_table(
-        "test_table",
-        pa.table(
-            {
-                "id": range(100),
-                "category": (["A", "B", "C", "D"] * 25)[:100],
-                "value": range(100),
-            }
-        ),
-    )
-
-    # Filter and shuffle
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .filter("category IN ('A', 'C')")
-        .shuffle(seed=42)
-        .execute()
-    )
-
-    row_count = permutation_tbl.count_rows()
-    assert row_count == 50  # Should have 50 rows (A and C categories)
-
-    data = permutation_tbl.search(None).to_arrow().to_pydict()
-    row_ids = data["row_id"]
-
-    assert row_ids != sorted(row_ids)
-
-
-def test_filter_empty_result(mem_db):
-    """Test filtering that results in empty set."""
-    tbl = mem_db.create_table(
-        "test_table", pa.table({"id": range(10), "value": range(10)})
-    )
-
-    # Filter that matches nothing
-    permutation_tbl = (
-        permutation_builder(tbl)
-        .filter("value > 100")  # No values > 100 in our data
-        .execute()
-    )
-
-    assert permutation_tbl.count_rows() == 0
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1298,79 +1298,6 @@ async def test_query_serialization_async(table_async: AsyncTable):
    )


-def test_query_schema(tmp_path):
-    db = lancedb.connect(tmp_path)
-    tbl = db.create_table(
-        "test",
-        pa.table(
-            {
-                "a": [1, 2, 3],
-                "text": ["a", "b", "c"],
-                "vec": pa.array(
-                    [[1, 2], [3, 4], [5, 6]], pa.list_(pa.float32(), list_size=2)
-                ),
-            }
-        ),
-    )
-
-    assert tbl.search(None).output_schema() == pa.schema(
-        {
-            "a": pa.int64(),
-            "text": pa.string(),
-            "vec": pa.list_(pa.float32(), list_size=2),
-        }
-    )
-    assert tbl.search(None).select({"bl": "a * 2"}).output_schema() == pa.schema(
-        {"bl": pa.int64()}
-    )
-    assert tbl.search([1, 2]).select(["a"]).output_schema() == pa.schema(
-        {"a": pa.int64(), "_distance": pa.float32()}
-    )
-    assert tbl.search("blah").select(["a"]).output_schema() == pa.schema(
-        {"a": pa.int64()}
-    )
-    assert tbl.take_offsets([0]).select(["text"]).output_schema() == pa.schema(
-        {"text": pa.string()}
-    )
-
-
-@pytest.mark.asyncio
-async def test_query_schema_async(tmp_path):
-    db = await lancedb.connect_async(tmp_path)
-    tbl = await db.create_table(
-        "test",
-        pa.table(
-            {
-                "a": [1, 2, 3],
-                "text": ["a", "b", "c"],
-                "vec": pa.array(
-                    [[1, 2], [3, 4], [5, 6]], pa.list_(pa.float32(), list_size=2)
-                ),
-            }
-        ),
-    )
-
-    assert await tbl.query().output_schema() == pa.schema(
-        {
-            "a": pa.int64(),
-            "text": pa.string(),
-            "vec": pa.list_(pa.float32(), list_size=2),
-        }
-    )
-    assert await tbl.query().select({"bl": "a * 2"}).output_schema() == pa.schema(
-        {"bl": pa.int64()}
-    )
-    assert await tbl.vector_search([1, 2]).select(["a"]).output_schema() == pa.schema(
-        {"a": pa.int64(), "_distance": pa.float32()}
-    )
-    assert await (await tbl.search("blah")).select(["a"]).output_schema() == pa.schema(
-        {"a": pa.int64()}
-    )
-    assert await tbl.take_offsets([0]).select(["text"]).output_schema() == pa.schema(
-        {"text": pa.string()}
-    )
-
-
 def test_query_timeout(tmp_path):
    # Use local directory instead of memory:// to add a bit of latency to
    # operations so a timeout of zero will trigger exceptions.
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -484,7 +484,7 @@ def test_jina_reranker(tmp_path, use_tantivy):
@pytest.mark.parametrize("use_tantivy", [True, False])
 def test_voyageai_reranker(tmp_path, use_tantivy):
    pytest.importorskip("voyageai")
-    reranker = VoyageAIReranker(model_name="rerank-2.5")
+    reranker = VoyageAIReranker(model_name="rerank-2")
    table, schema = get_test_table(tmp_path, use_tantivy)
    _run_test_reranker(reranker, table, "single player experience", None, schema)

--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -4,10 +4,7 @@
 use std::{collections::HashMap, sync::Arc, time::Duration};

 use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
-use lancedb::{
-    connection::Connection as LanceConnection,
-    database::{CreateTableMode, ReadConsistency},
-};
+use lancedb::{connection::Connection as LanceConnection, database::CreateTableMode};
 use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
@@ -26,7 +23,7 @@ impl Connection {
        Self { inner: Some(inner) }
    }

-    pub(crate) fn get_inner(&self) -> PyResult<&LanceConnection> {
+    fn get_inner(&self) -> PyResult<&LanceConnection> {
        self.inner
            .as_ref()
            .ok_or_else(|| PyRuntimeError::new_err("Connection is closed"))
@@ -66,18 +63,6 @@ impl Connection {
        self.get_inner().map(|inner| inner.uri().to_string())
    }

-    #[pyo3(signature = ())]
-    pub fn get_read_consistency_interval(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.get_inner()?.clone();
-        future_into_py(self_.py(), async move {
-            Ok(match inner.read_consistency().await.infer_error()? {
-                ReadConsistency::Manual => None,
-                ReadConsistency::Eventual(duration) => Some(duration.as_secs_f64()),
-                ReadConsistency::Strong => Some(0.0_f64),
-            })
-        })
-    }
-
    #[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
    pub fn table_names(
        self_: PyRef<'_, Self>,
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -5,7 +5,6 @@ use arrow::RecordBatchStream;
 use connection::{connect, Connection};
 use env_logger::Env;
 use index::IndexConfig;
-use permutation::PyAsyncPermutationBuilder;
 use pyo3::{
    pymodule,
    types::{PyModule, PyModuleMethods},
@@ -23,7 +22,6 @@ pub mod connection;
 pub mod error;
 pub mod header;
 pub mod index;
-pub mod permutation;
 pub mod query;
 pub mod session;
 pub mod table;
@@ -51,9 +49,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<DeleteResult>()?;
    m.add_class::<DropColumnsResult>()?;
    m.add_class::<UpdateResult>()?;
-    m.add_class::<PyAsyncPermutationBuilder>()?;
    m.add_function(wrap_pyfunction!(connect, m)?)?;
-    m.add_function(wrap_pyfunction!(permutation::async_permutation_builder, m)?)?;
    m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
    Ok(())
--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -1,170 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use std::sync::{Arc, Mutex};
-
-use crate::{error::PythonErrorExt, table::Table};
-use lancedb::dataloader::{
-    permutation::builder::{PermutationBuilder as LancePermutationBuilder, ShuffleStrategy},
-    permutation::split::{SplitSizes, SplitStrategy},
-};
-use pyo3::{
-    exceptions::PyRuntimeError, pyclass, pymethods, types::PyAnyMethods, Bound, PyAny, PyRefMut,
-    PyResult,
-};
-use pyo3_async_runtimes::tokio::future_into_py;
-
-/// Create a permutation builder for the given table
-#[pyo3::pyfunction]
-pub fn async_permutation_builder(table: Bound<'_, PyAny>) -> PyResult<PyAsyncPermutationBuilder> {
-    let table = table.getattr("_inner")?.downcast_into::<Table>()?;
-    let inner_table = table.borrow().inner_ref()?.clone();
-    let inner_builder = LancePermutationBuilder::new(inner_table);
-
-    Ok(PyAsyncPermutationBuilder {
-        state: Arc::new(Mutex::new(PyAsyncPermutationBuilderState {
-            builder: Some(inner_builder),
-        })),
-    })
-}
-
-struct PyAsyncPermutationBuilderState {
-    builder: Option<LancePermutationBuilder>,
-}
-
-#[pyclass(name = "AsyncPermutationBuilder")]
-pub struct PyAsyncPermutationBuilder {
-    state: Arc<Mutex<PyAsyncPermutationBuilderState>>,
-}
-
-impl PyAsyncPermutationBuilder {
-    fn modify(
-        &self,
-        func: impl FnOnce(LancePermutationBuilder) -> LancePermutationBuilder,
-    ) -> PyResult<Self> {
-        let mut state = self.state.lock().unwrap();
-        let builder = state
-            .builder
-            .take()
-            .ok_or_else(|| PyRuntimeError::new_err("Builder already consumed"))?;
-        state.builder = Some(func(builder));
-        Ok(Self {
-            state: self.state.clone(),
-        })
-    }
-}
-
-#[pymethods]
-impl PyAsyncPermutationBuilder {
-    #[pyo3(signature = (*, ratios=None, counts=None, fixed=None, seed=None))]
-    pub fn split_random(
-        slf: PyRefMut<'_, Self>,
-        ratios: Option<Vec<f64>>,
-        counts: Option<Vec<u64>>,
-        fixed: Option<u64>,
-        seed: Option<u64>,
-    ) -> PyResult<Self> {
-        // Check that exactly one split type is provided
-        let split_args_count = [ratios.is_some(), counts.is_some(), fixed.is_some()]
-            .iter()
-            .filter(|&&x| x)
-            .count();
-
-        if split_args_count != 1 {
-            return Err(pyo3::exceptions::PyValueError::new_err(
-                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
-            ));
-        }
-
-        let sizes = if let Some(ratios) = ratios {
-            SplitSizes::Percentages(ratios)
-        } else if let Some(counts) = counts {
-            SplitSizes::Counts(counts)
-        } else if let Some(fixed) = fixed {
-            SplitSizes::Fixed(fixed)
-        } else {
-            unreachable!("One of the split arguments must be provided");
-        };
-
-        slf.modify(|builder| builder.with_split_strategy(SplitStrategy::Random { seed, sizes }))
-    }
-
-    #[pyo3(signature = (columns, split_weights, *, discard_weight=0))]
-    pub fn split_hash(
-        slf: PyRefMut<'_, Self>,
-        columns: Vec<String>,
-        split_weights: Vec<u64>,
-        discard_weight: u64,
-    ) -> PyResult<Self> {
-        slf.modify(|builder| {
-            builder.with_split_strategy(SplitStrategy::Hash {
-                columns,
-                split_weights,
-                discard_weight,
-            })
-        })
-    }
-
-    #[pyo3(signature = (*, ratios=None, counts=None, fixed=None))]
-    pub fn split_sequential(
-        slf: PyRefMut<'_, Self>,
-        ratios: Option<Vec<f64>>,
-        counts: Option<Vec<u64>>,
-        fixed: Option<u64>,
-    ) -> PyResult<Self> {
-        // Check that exactly one split type is provided
-        let split_args_count = [ratios.is_some(), counts.is_some(), fixed.is_some()]
-            .iter()
-            .filter(|&&x| x)
-            .count();
-
-        if split_args_count != 1 {
-            return Err(pyo3::exceptions::PyValueError::new_err(
-                "Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
-            ));
-        }
-
-        let sizes = if let Some(ratios) = ratios {
-            SplitSizes::Percentages(ratios)
-        } else if let Some(counts) = counts {
-            SplitSizes::Counts(counts)
-        } else if let Some(fixed) = fixed {
-            SplitSizes::Fixed(fixed)
-        } else {
-            unreachable!("One of the split arguments must be provided");
-        };
-
-        slf.modify(|builder| builder.with_split_strategy(SplitStrategy::Sequential { sizes }))
-    }
-
-    pub fn split_calculated(slf: PyRefMut<'_, Self>, calculation: String) -> PyResult<Self> {
-        slf.modify(|builder| builder.with_split_strategy(SplitStrategy::Calculated { calculation }))
-    }
-
-    pub fn shuffle(
-        slf: PyRefMut<'_, Self>,
-        seed: Option<u64>,
-        clump_size: Option<u64>,
-    ) -> PyResult<Self> {
-        slf.modify(|builder| {
-            builder.with_shuffle_strategy(ShuffleStrategy::Random { seed, clump_size })
-        })
-    }
-
-    pub fn filter(slf: PyRefMut<'_, Self>, filter: String) -> PyResult<Self> {
-        slf.modify(|builder| builder.with_filter(filter))
-    }
-
-    pub fn execute(slf: PyRefMut<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let mut state = slf.state.lock().unwrap();
-        let builder = state
-            .builder
-            .take()
-            .ok_or_else(|| PyRuntimeError::new_err("Builder already consumed"))?;
-
-        future_into_py(slf.py(), async move {
-            let table = builder.build().await.infer_error()?;
-            Ok(Table::new(table))
-        })
-    }
-}
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -9,7 +9,6 @@ use arrow::array::Array;
 use arrow::array::ArrayData;
 use arrow::pyarrow::FromPyArrow;
 use arrow::pyarrow::IntoPyArrow;
-use arrow::pyarrow::ToPyArrow;
 use lancedb::index::scalar::{
    BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur,
    Operator, PhraseQuery,
@@ -31,7 +30,6 @@ use pyo3::IntoPyObject;
 use pyo3::PyAny;
 use pyo3::PyRef;
 use pyo3::PyResult;
-use pyo3::Python;
 use pyo3::{exceptions::PyRuntimeError, FromPyObject};
 use pyo3::{
    exceptions::{PyNotImplementedError, PyValueError},
@@ -447,15 +445,6 @@ impl Query {
        })
    }

-    #[pyo3(signature = ())]
-    pub fn output_schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
-        future_into_py(self_.py(), async move {
-            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
-        })
-    }
-
    #[pyo3(signature = (max_batch_length=None, timeout=None))]
    pub fn execute(
        self_: PyRef<'_, Self>,
@@ -526,15 +515,6 @@ impl TakeQuery {
        self.inner = self.inner.clone().with_row_id();
    }

-    #[pyo3(signature = ())]
-    pub fn output_schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
-        future_into_py(self_.py(), async move {
-            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
-        })
-    }
-
    #[pyo3(signature = (max_batch_length=None, timeout=None))]
    pub fn execute(
        self_: PyRef<'_, Self>,
@@ -621,15 +601,6 @@ impl FTSQuery {
        self.inner = self.inner.clone().postfilter();
    }

-    #[pyo3(signature = ())]
-    pub fn output_schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
-        future_into_py(self_.py(), async move {
-            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
-        })
-    }
-
    #[pyo3(signature = (max_batch_length=None, timeout=None))]
    pub fn execute(
        self_: PyRef<'_, Self>,
@@ -800,15 +771,6 @@ impl VectorQuery {
        self.inner = self.inner.clone().bypass_vector_index()
    }

-    #[pyo3(signature = ())]
-    pub fn output_schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
-        let inner = self_.inner.clone();
-        future_into_py(self_.py(), async move {
-            let schema = inner.output_schema().await.infer_error()?;
-            Python::with_gil(|py| schema.to_pyarrow(py))
-        })
-    }
-
    #[pyo3(signature = (max_batch_length=None, timeout=None))]
    pub fn execute(
        self_: PyRef<'_, Self>,
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -3,7 +3,6 @@
 use std::{collections::HashMap, sync::Arc};

 use crate::{
-    connection::Connection,
    error::PythonErrorExt,
    index::{extract_index_params, IndexConfig},
    query::{Query, TakeQuery},
@@ -250,7 +249,7 @@ impl Table {
 }

 impl Table {
-    pub(crate) fn inner_ref(&self) -> PyResult<&LanceDbTable> {
+    fn inner_ref(&self) -> PyResult<&LanceDbTable> {
        self.inner
            .as_ref()
            .ok_or_else(|| PyRuntimeError::new_err(format!("Table {} is closed", self.name)))
@@ -273,13 +272,6 @@ impl Table {
        self.inner.take();
    }

-    pub fn database(&self) -> PyResult<Connection> {
-        let inner = self.inner_ref()?.clone();
-        let inner_connection =
-            lancedb::Connection::new(inner.database().clone(), inner.embedding_registry().clone());
-        Ok(Connection::new(inner_connection))
-    }
-
    pub fn schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "1.90.0"
+channel = "1.86.0"
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.3-beta.3"
+version = "0.22.2-beta.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -11,12 +11,10 @@ rust-version.workspace = true

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
-ahash = { workspace = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-arrow-select = { workspace = true }
 arrow-ord = { workspace = true }
 arrow-cast = { workspace = true }
 arrow-ipc.workspace = true
@@ -26,25 +24,19 @@ datafusion-common.workspace = true
 datafusion-execution.workspace = true
 datafusion-expr.workspace = true
 datafusion-physical-plan.workspace = true
-datafusion.workspace = true
 object_store = { workspace = true }
 snafu = { workspace = true }
 half = { workspace = true }
 lazy_static.workspace = true
 lance = { workspace = true }
-lance-core = { workspace = true }
 lance-datafusion.workspace = true
-lance-datagen = { workspace = true }
-lance-file = { workspace = true }
 lance-io = { workspace = true }
 lance-index = { workspace = true }
 lance-table = { workspace = true }
 lance-linalg = { workspace = true }
 lance-testing = { workspace = true }
 lance-encoding = { workspace = true }
-lance-arrow = { workspace = true }
 lance-namespace = { workspace = true }
-lance-namespace-impls = { workspace = true }
 moka = { workspace = true }
 pin-project = { workspace = true }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
@@ -54,13 +46,11 @@ bytes = "1"
 futures.workspace = true
 num-traits.workspace = true
 url.workspace = true
-rand.workspace = true
 regex.workspace = true
 serde = { version = "^1" }
 serde_json = { version = "1" }
 async-openai = { version = "0.20.0", optional = true }
 serde_with = { version = "3.8.1" }
-tempfile = "3.5.0"
 aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
 # For remote feature
 reqwest = { version = "0.12.0", default-features = false, features = [
@@ -71,8 +61,9 @@ reqwest = { version = "0.12.0", default-features = false, features = [
    "macos-system-configuration",
    "stream",
 ], optional = true }
+rand = { version = "0.9", features = ["small_rng"], optional = true }
 http = { version = "1", optional = true } # Matching what is in reqwest
-uuid = { version = "1.7.0", features = ["v4"] }
+uuid = { version = "1.7.0", features = ["v4"], optional = true }
 polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
 polars = { version = ">=0.37,<0.40.0", optional = true }
 hf-hub = { version = "0.4.1", optional = true, default-features = false, features = [
@@ -93,6 +84,7 @@ bytemuck_derive.workspace = true
 [dev-dependencies]
 anyhow = "1"
 tempfile = "3.5.0"
+rand = { version = "0.9", features = ["small_rng"] }
 random_word = { version = "0.4.3", features = ["en"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 walkdir = "2"
@@ -104,7 +96,6 @@ aws-smithy-runtime = { version = "1.9.1" }
 datafusion.workspace = true
 http-body = "1"                                        # Matching reqwest
 rstest = "0.23.0"
-test-log = "0.2"


 [features]
@@ -114,7 +105,7 @@ oss = ["lance/oss", "lance-io/oss"]
 gcs = ["lance/gcp", "lance-io/gcp"]
 azure = ["lance/azure", "lance-io/azure"]
 dynamodb = ["lance/dynamodb", "aws"]
-remote = ["dep:reqwest", "dep:http"]
+remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
 fp16kernels = ["lance-linalg/fp16kernels"]
 s3-test = []
 bedrock = ["dep:aws-sdk-bedrockruntime"]
--- a/rust/lancedb/src/arrow.rs
+++ b/rust/lancedb/src/arrow.rs
@@ -7,7 +7,6 @@ pub use arrow_schema;
 use datafusion_common::DataFusionError;
 use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
 use futures::{Stream, StreamExt, TryStreamExt};
-use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount};

 #[cfg(feature = "polars")]
 use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame};
@@ -162,26 +161,6 @@ impl IntoArrowStream for datafusion_physical_plan::SendableRecordBatchStream {
    }
 }

-pub trait LanceDbDatagenExt {
-    fn into_ldb_stream(
-        self,
-        batch_size: RowCount,
-        num_batches: BatchCount,
-    ) -> SendableRecordBatchStream;
-}
-
-impl LanceDbDatagenExt for BatchGeneratorBuilder {
-    fn into_ldb_stream(
-        self,
-        batch_size: RowCount,
-        num_batches: BatchCount,
-    ) -> SendableRecordBatchStream {
-        let (stream, schema) = self.into_reader_stream(batch_size, num_batches);
-        let stream = stream.map_err(|err| Error::Arrow { source: err });
-        Box::pin(SimpleRecordBatchStream::new(stream, schema))
-    }
-}
-
 #[cfg(feature = "polars")]
 /// An iterator of record batches formed from a Polars DataFrame.
 pub struct PolarsDataFrameRecordBatchReader {
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -19,7 +19,7 @@ use crate::database::listing::{
 use crate::database::{
    CloneTableRequest, CreateNamespaceRequest, CreateTableData, CreateTableMode,
    CreateTableRequest, Database, DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest,
-    OpenTableRequest, ReadConsistency, TableNamesRequest,
+    OpenTableRequest, TableNamesRequest,
 };
 use crate::embeddings::{
    EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
@@ -152,7 +152,6 @@ impl CreateTableBuilder<true> {
        let request = self.into_request()?;
        Ok(Table::new_with_embedding_registry(
            parent.create_table(request).await?,
-            parent,
            embedding_registry,
        ))
    }
@@ -212,9 +211,9 @@ impl CreateTableBuilder<false> {

    /// Execute the create table operation
    pub async fn execute(self) -> Result<Table> {
-        let parent = self.parent.clone();
-        let table = parent.create_table(self.request).await?;
-        Ok(Table::new(table, parent))
+        Ok(Table::new(
+            self.parent.clone().create_table(self.request).await?,
+        ))
    }
 }

@@ -463,10 +462,8 @@ impl OpenTableBuilder {

    /// Open the table
    pub async fn execute(self) -> Result<Table> {
-        let table = self.parent.open_table(self.request).await?;
        Ok(Table::new_with_embedding_registry(
-            table,
-            self.parent,
+            self.parent.clone().open_table(self.request).await?,
            self.embedding_registry,
        ))
    }
@@ -522,15 +519,16 @@ impl CloneTableBuilder {

    /// Execute the clone operation
    pub async fn execute(self) -> Result<Table> {
-        let parent = self.parent.clone();
-        let table = parent.clone_table(self.request).await?;
-        Ok(Table::new(table, parent))
+        Ok(Table::new(
+            self.parent.clone().clone_table(self.request).await?,
+        ))
    }
 }

 /// A connection to LanceDB
 #[derive(Clone)]
 pub struct Connection {
+    uri: String,
    internal: Arc<dyn Database>,
    embedding_registry: Arc<dyn EmbeddingRegistry>,
 }
@@ -542,19 +540,9 @@ impl std::fmt::Display for Connection {
 }

 impl Connection {
-    pub fn new(
-        internal: Arc<dyn Database>,
-        embedding_registry: Arc<dyn EmbeddingRegistry>,
-    ) -> Self {
-        Self {
-            internal,
-            embedding_registry,
-        }
-    }
-
    /// Get the URI of the connection
    pub fn uri(&self) -> &str {
-        self.internal.uri()
+        self.uri.as_str()
    }

    /// Get access to the underlying database
@@ -687,11 +675,6 @@ impl Connection {
            .await
    }

-    /// Get the read consistency of the connection
-    pub async fn read_consistency(&self) -> Result<ReadConsistency> {
-        self.internal.read_consistency().await
-    }
-
    /// Drop a table in the database.
    ///
    /// # Arguments
@@ -990,6 +973,7 @@ impl ConnectBuilder {
        )?);
        Ok(Connection {
            internal,
+            uri: self.request.uri,
            embedding_registry: self
                .embedding_registry
                .unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
@@ -1012,6 +996,7 @@ impl ConnectBuilder {
            let internal = Arc::new(ListingDatabase::connect_with_options(&self.request).await?);
            Ok(Connection {
                internal,
+                uri: self.request.uri,
                embedding_registry: self
                    .embedding_registry
                    .unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
@@ -1119,6 +1104,7 @@ impl ConnectNamespaceBuilder {

        Ok(Connection {
            internal,
+            uri: format!("namespace://{}", self.ns_impl),
            embedding_registry: self
                .embedding_registry
                .unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
@@ -1153,6 +1139,7 @@ mod test_utils {
            let internal = Arc::new(crate::remote::db::RemoteDatabase::new_mock(handler));
            Self {
                internal,
+                uri: "db://test".to_string(),
                embedding_registry: Arc::new(MemoryRegistry::new()),
            }
        }
@@ -1169,6 +1156,7 @@ mod test_utils {
            ));
            Self {
                internal,
+                uri: "db://test".to_string(),
                embedding_registry: Arc::new(MemoryRegistry::new()),
            }
        }
@@ -1182,13 +1170,13 @@ mod tests {
    use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
    use crate::query::QueryBase;
    use crate::query::{ExecutableQuery, QueryExecutionOptions};
-    use crate::test_utils::connection::new_test_connection;
+    use crate::test_connection::test_utils::new_test_connection;
    use arrow::compute::concat_batches;
    use arrow_array::RecordBatchReader;
    use arrow_schema::{DataType, Field, Schema};
    use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
    use futures::{stream, TryStreamExt};
-    use lance_core::error::{ArrowResult, DataFusionResult};
+    use lance::error::{ArrowResult, DataFusionResult};
    use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
    use tempfile::tempdir;

@@ -1199,7 +1187,7 @@ mod tests {
    #[tokio::test]
    async fn test_connect() {
        let tc = new_test_connection().await.unwrap();
-        assert_eq!(tc.connection.uri(), tc.uri);
+        assert_eq!(tc.connection.uri, tc.uri);
    }

    #[cfg(not(windows))]
@@ -1220,7 +1208,7 @@ mod tests {
            .await
            .unwrap();

-        assert_eq!(db.uri(), relative_uri.to_str().unwrap().to_string());
+        assert_eq!(db.uri, relative_uri.to_str().unwrap().to_string());
    }

    #[tokio::test]
--- a/rust/lancedb/src/data/inspect.rs
+++ b/rust/lancedb/src/data/inspect.rs
@@ -52,13 +52,13 @@ pub fn infer_vector_columns(
    for field in reader.schema().fields() {
        match field.data_type() {
            DataType::FixedSizeList(sub_field, _) if sub_field.data_type().is_floating() => {
-                columns.push(field.name().clone());
+                columns.push(field.name().to_string());
            }
            DataType::List(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().clone(), None);
+                columns_to_infer.insert(field.name().to_string(), None);
            }
            DataType::LargeList(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().clone(), None);
+                columns_to_infer.insert(field.name().to_string(), None);
            }
            _ => {}
        }
--- a/rust/lancedb/src/data/sanitize.rs
+++ b/rust/lancedb/src/data/sanitize.rs
@@ -12,7 +12,7 @@ use arrow_array::{
 use arrow_cast::{can_cast_types, cast};
 use arrow_schema::{ArrowError, DataType, Field, Schema};
 use half::f16;
-use lance_arrow::{DataTypeExt, FixedSizeListArrayExt};
+use lance::arrow::{DataTypeExt, FixedSizeListArrayExt};
 use log::warn;
 use num_traits::cast::AsPrimitive;

@@ -189,7 +189,7 @@ mod tests {
    };
    use arrow_schema::Field;
    use half::f16;
-    use lance_arrow::FixedSizeListArrayExt;
+    use lance::arrow::FixedSizeListArrayExt;

    #[test]
    fn test_coerce_list_to_fixed_size_list() {
--- a/rust/lancedb/src/database.rs
+++ b/rust/lancedb/src/database.rs
@@ -16,7 +16,6 @@

 use std::collections::HashMap;
 use std::sync::Arc;
-use std::time::Duration;

 use arrow_array::RecordBatchReader;
 use async_trait::async_trait;
@@ -214,20 +213,6 @@ impl CloneTableRequest {
    }
 }

-/// How long until a change is reflected from one Table instance to another
-///
-/// Tables are always internally consistent.  If a write method is called on
-/// a table instance it will be immediately visible in that same table instance.
-pub enum ReadConsistency {
-    /// Changes will not be automatically propagated until the checkout_latest
-    /// method is called on the target table
-    Manual,
-    /// Changes will be propagated automatically within the given duration
-    Eventual(Duration),
-    /// Changes are immediately visible in target tables
-    Strong,
-}
-
 /// The `Database` trait defines the interface for database implementations.
 ///
 /// A database is responsible for managing tables and their metadata.
@@ -235,10 +220,6 @@ pub enum ReadConsistency {
 pub trait Database:
    Send + Sync + std::any::Any + std::fmt::Debug + std::fmt::Display + 'static
 {
-    /// Get the uri of the database
-    fn uri(&self) -> &str;
-    /// Get the read consistency of the database
-    async fn read_consistency(&self) -> Result<ReadConsistency>;
    /// List immediate child namespace names in the given namespace
    async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>>;
    /// Create a new namespace
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Colin P. McCabe	d6ea17073c	test	2025-09-30 11:58:20 -07:00
BubbleCal	c123bbf391	Merge branch 'main' of https://github.com/lancedb/lancedb into add-ivfrq	2025-09-30 16:30:58 +08:00
BubbleCal	fb856005a9	update docs Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 18:24:58 +08:00
BubbleCal	5c1c2e2dd6	fmt Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 17:47:59 +08:00
BubbleCal	1beef5f6e3	fix Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 17:08:12 +08:00
BubbleCal	0913632584	feat: support IVF_RQ index type Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 16:53:43 +08:00