ci: fix pypi publish on mac/windows/arm (#3449 )

The python-v0.32.0 publish run failed on every build matrix entry. Three independent issues: 1. **Mac and Windows**: `pypa/gh-action-pypi-publish` only runs on Linux, but was being called inline from each build job. 2. **Linux (all arches)**: `pypa/gh-action-pypi-publish` derives its docker image name from `github.action_repository`, which is empty when the action is invoked from inside a composite action (actions/runner#2473 — pypa's own `action.yml` references this bug). It falls back to `github.repository`, generating `docker://ghcr.io/lancedb/lancedb:<tag>`, which doesn't exist → `denied`. Only the ARM matrix entry surfaced this because it failed first and cancel-cascaded the rest. 3. **Windows**: `upload-artifact` in `build_windows_wheel` pointed at `python\target\wheels`, but maturin writes to the workspace-root `target/wheels`. The artifact was always empty. Also, `pypi-publish.yml` passed a `vcpkg_token` input that the composite doesn't declare. ## Changes - Build jobs (linux/mac/windows) now upload their wheels as `actions/upload-artifact` artifacts. - New Linux `publish` job downloads all wheel artifacts and runs the Fury or PyPA publish step directly (not via a composite), so `github.action_repository` resolves correctly. - Delete the unused `upload_wheel` composite action. - Drop the broken upload-artifact step inside `build_windows_wheel`. - Remove the bogus `vcpkg_token` input. - Fury upload now loops over all wheels instead of just the first. - Bump `actions/checkout`, `actions/upload-artifact`, `actions/download-artifact` to current major versions (Node 24) to clear deprecation warnings. - Bump Windows job timeout 60 → 90 minutes; previous run was cancel-timing-out on a 60m cap. - Use `rust-lld` as the Windows MSVC linker via `CARGO_TARGET_X86_64_PC_WINDOWS_MSVC_LINKER`. `link.exe` is single-threaded and the long pole on Windows builds. Fixes #3445 ## Test plan - [x] Open this PR — `paths` filter triggers a dry-run build on all three platforms. - [x] Verify all three builds produce wheels. - [x] Confirm the `pypa/gh-action-pypi-publish` container actually starts (the actions/runner#2473 bug) via the `publish-dry-run` job pointed at TestPyPI. - [x] **REMOVE BEFORE MERGE**: drop the `publish-dry-run` job and the now-redundant `actions/upload-artifact` runs on PRs (currently always-on so the dry-run has wheels to publish). - [ ] After merge, cherry-pick onto `python-v0.32.0` and force-push the tag to re-trigger the publish.
Bump version: 0.30.0-beta.2 → 0.30.0
2026-06-02 20:00:46 +00:00 · 2026-05-28 12:35:58 -07:00 · 2026-05-28 19:03:15 +00:00 · 2026-05-28 19:02:41 +00:00 · 2026-05-28 19:02:05 +00:00 · 2026-05-28 19:02:03 +00:00
72 changed files with 15433 additions and 1025 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.29.1-beta.0"
+current_version = "0.30.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -11,6 +11,11 @@ updates:
    schedule:
      interval: weekly
    open-pull-requests-limit: 10
+    # Only update Cargo.lock, never widen/raise the version requirements in
+    # Cargo.toml. The goal is keeping the lockfile (and the binaries we ship)
+    # current on security fixes, not forcing our library's consumers onto
+    # newer minimum versions.
+    versioning-strategy: lockfile-only
    groups:
      rust-minor-patch:
        update-types:
--- a/.github/workflows/build_windows_wheel/action.yml
+++ b/.github/workflows/build_windows_wheel/action.yml
@@ -29,7 +29,3 @@ runs:
        args: ${{ inputs.args }}
        docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
        working-directory: python
-    - uses: actions/upload-artifact@v4
-      with:
-        name: windows-wheels
-        path: python\target\wheels
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -157,7 +157,10 @@ jobs:
        npx jest --testEnvironment jest-environment-node-single-context --verbose
  macos:
    timeout-minutes: 30
-    runs-on: "macos-14"
+    # macos-15 ships a newer linker; the older macos-14 linker fails to insert
+    # branch islands when the debug cdylib's __text section exceeds the 128 MB
+    # AArch64 B/BL branch range.
+    runs-on: "macos-15"
    defaults:
      run:
        shell: bash
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -8,6 +8,9 @@ on:
    # This should trigger a dry run (we skip the final publish step)
    paths:
      - .github/workflows/pypi-publish.yml
+      - .github/workflows/build_linux_wheel/action.yml
+      - .github/workflows/build_mac_wheel/action.yml
+      - .github/workflows/build_windows_wheel/action.yml
      - Cargo.toml # Change in dependency frequently breaks builds
      - Cargo.lock

@@ -21,9 +24,6 @@ jobs:
  linux:
    name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
    timeout-minutes: 60
-    permissions:
-      id-token: write
-      contents: read
    strategy:
      matrix:
        config:
@@ -46,7 +46,7 @@ jobs:
            runner: ubuntu-2404-8x-arm64
    runs-on: ${{ matrix.config.runner }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -60,15 +60,14 @@ jobs:
          args: "--release --strip ${{ matrix.config.extra_args }}"
          arm-build: ${{ matrix.config.platform == 'aarch64' }}
          manylinux: ${{ matrix.config.manylinux }}
-      - uses: ./.github/workflows/upload_wheel
+      - uses: actions/upload-artifact@v7
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          fury_token: ${{ secrets.FURY_TOKEN }}
+          name: wheels-linux-${{ matrix.config.platform }}-${{ matrix.config.manylinux }}
+          path: target/wheels/lancedb-*.whl
+          if-no-files-found: error
  mac:
    timeout-minutes: 90
-    permissions:
-      id-token: write
-      contents: read
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
@@ -78,7 +77,7 @@ jobs:
    env:
      MACOSX_DEPLOYMENT_TARGET: 10.15
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -90,18 +89,21 @@ jobs:
        with:
          python-minor-version: 10
          args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
-      - uses: ./.github/workflows/upload_wheel
+      - uses: actions/upload-artifact@v7
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          fury_token: ${{ secrets.FURY_TOKEN }}
+          name: wheels-mac-${{ matrix.config.target }}
+          path: target/wheels/lancedb-*.whl
+          if-no-files-found: error
  windows:
-    timeout-minutes: 60
-    permissions:
-      id-token: write
-      contents: read
+    timeout-minutes: 90
    runs-on: windows-latest
+    env:
+      # link.exe is single-threaded and the long pole on Windows builds. Use
+      # rustc's bundled lld-link instead.
+      CARGO_TARGET_X86_64_PC_WINDOWS_MSVC_LINKER: rust-lld
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -113,18 +115,70 @@ jobs:
        with:
          python-minor-version: 10
          args: "--release --strip"
-          vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
-      - uses: ./.github/workflows/upload_wheel
+      - uses: actions/upload-artifact@v7
        if: startsWith(github.ref, 'refs/tags/python-v')
        with:
-          fury_token: ${{ secrets.FURY_TOKEN }}
+          name: wheels-windows
+          path: target/wheels/lancedb-*.whl
+          if-no-files-found: error
+  publish:
+    name: Publish wheels
+    if: startsWith(github.ref, 'refs/tags/python-v')
+    needs: [linux, mac, windows]
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v6
+      - name: Download wheel artifacts
+        uses: actions/download-artifact@v8
+        with:
+          pattern: wheels-*
+          path: target/wheels
+          merge-multiple: true
+      - name: List wheels
+        run: ls -la target/wheels
+      - name: Choose repo
+        id: choose_repo
+        run: |
+          if [[ ${{ github.ref }} == *beta* ]]; then
+            echo "repo=fury" >> $GITHUB_OUTPUT
+          else
+            echo "repo=pypi" >> $GITHUB_OUTPUT
+          fi
+      - name: Publish to Fury
+        if: steps.choose_repo.outputs.repo == 'fury'
+        env:
+          FURY_TOKEN: ${{ secrets.FURY_TOKEN }}
+        run: |
+          shopt -s nullglob
+          WHEELS=(target/wheels/lancedb-*.whl)
+          if [[ ${#WHEELS[@]} -eq 0 ]]; then
+            echo "No wheels found in target/wheels/" >&2
+            exit 1
+          fi
+          for WHEEL in "${WHEELS[@]}"; do
+            echo "Uploading $WHEEL to Fury"
+            curl -f -F package=@"$WHEEL" "https://$FURY_TOKEN@push.fury.io/lancedb/"
+          done
+      # NOTE: pypa/gh-action-pypi-publish must be invoked directly from a
+      # workflow file, not from inside a composite action. When called from a
+      # composite, `github.action_repository` is empty (actions/runner#2473)
+      # and the action falls back to `github.repository`, producing a bogus
+      # `docker://ghcr.io/<repo>:<ref>` image reference that GHA tries to pull.
+      - name: Publish to PyPI
+        if: steps.choose_repo.outputs.repo == 'pypi'
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: target/wheels/
  gh-release:
    if: startsWith(github.ref, 'refs/tags/python-v')
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -187,13 +241,13 @@ jobs:
  report-failure:
    name: Report Workflow Failure
    runs-on: ubuntu-latest
-    needs: [linux, mac, windows]
+    needs: [linux, mac, windows, publish]
    permissions:
      contents: read
      issues: write
    if: always() && failure() && startsWith(github.ref, 'refs/tags/python-v')
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -205,7 +205,7 @@ jobs:
      - name: Delete wheels
        run: rm -rf target/wheels
  pydantic1x:
-    timeout-minutes: 30
+    timeout-minutes: 60
    runs-on: "ubuntu-24.04"
    defaults:
      run:
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -233,6 +233,26 @@ jobs:
          cargo update -p aws-sdk-sso --precise 1.62.0
          cargo update -p aws-sdk-ssooidc --precise 1.63.0
          cargo update -p aws-sdk-sts --precise 1.63.0
+          # aws-runtime/sigv4/credential-types/types and the aws-smithy-*
+          # crates bumped their MSRV to 1.91.1 in late 2026; pin to the last
+          # 1.91.0-compatible versions. The order matters — each downgrade
+          # only succeeds once everything that still pins it at a higher
+          # version has itself been downgraded.
+          cargo update -p aws-runtime --precise 1.5.12
+          cargo update -p aws-types --precise 1.3.9
+          cargo update -p aws-sigv4 --precise 1.3.5
+          cargo update -p aws-credential-types --precise 1.2.8
+          cargo update -p aws-smithy-checksums --precise 0.63.9
+          cargo update -p aws-smithy-runtime --precise 1.9.3
+          cargo update -p aws-smithy-http --precise 0.62.4
+          cargo update -p aws-smithy-eventstream --precise 0.60.12
+          cargo update -p aws-smithy-http-client --precise 1.1.3
+          cargo update -p aws-smithy-observability --precise 0.1.4
+          cargo update -p aws-smithy-query --precise 0.60.8
+          cargo update -p aws-smithy-runtime-api --precise 1.9.1
+          cargo update -p aws-smithy-async --precise 1.2.6
+          cargo update -p aws-smithy-types --precise 1.3.5
+          cargo update -p aws-smithy-xml --precise 0.60.11
          cargo update -p home --precise 0.5.9
      - name: cargo +${{ matrix.msrv }} check
        env:
--- a/.github/workflows/upload_wheel/action.yml
+++ b/.github/workflows/upload_wheel/action.yml
@@ -1,34 +0,0 @@
-name: upload-wheel
-
-description: "Upload wheels to Pypi"
-inputs:
-  fury_token:
-    required: true
-    description: "release token for the fury repo"
-
-runs:
-  using: "composite"
-  steps:
-  - name: Choose repo
-    shell: bash
-    id: choose_repo
-    run: |
-      if [[ ${{ github.ref }} == *beta* ]]; then
-        echo "repo=fury" >> $GITHUB_OUTPUT
-      else
-        echo "repo=pypi" >> $GITHUB_OUTPUT
-      fi
-  - name: Publish to Fury
-    if: steps.choose_repo.outputs.repo == 'fury'
-    shell: bash
-    env:
-      FURY_TOKEN: ${{ inputs.fury_token }}
-    run: |
-      WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
-      echo "Uploading $WHEEL to Fury"
-      curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
-  - name: Publish to PyPI
-    if: steps.choose_repo.outputs.repo == 'pypi'
-    uses: pypa/gh-action-pypi-publish@release/v1
-    with:
-      packages-dir: target/wheels/
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -17,9 +17,33 @@ Common commands:
 * Run tests: `cargo test --quiet --features remote --tests`
 * Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
 * Lint: `cargo clippy --quiet --features remote --tests --examples`
-* Format: `cargo fmt --all`
+* Format Rust: `cargo fmt --all`
+* Format Python: `ruff format .`
+* Lint Python: `ruff check .`
+* Bootstrap Python dev env: `cd python && uv run --extra tests --extra dev maturin develop --extras tests,dev`
+* Run Python tests: `cd python && uv run --extra tests pytest python/tests -vv --durations=10 -m "not slow and not s3_test"`
+* Run specific Python test: `cd python && uv run --extra tests pytest python/tests/<test_file>.py::<test_name> -q`

-Before committing changes, run formatting.
+For Python validation, prefer the uv-managed environment declared by `python/uv.lock`.
+Do not treat system `python`, global `pytest`, or missing editable-install errors as
+final blockers; bootstrap or enter the uv environment instead. If `lancedb._lancedb`
+is missing or stale, or if Rust/PyO3 binding code changed, rebuild the Python
+extension with the bootstrap command above before running tests.
+
+Before committing changes, run formatting for every language you touched. At minimum:
+
+* Rust changes: run `cargo fmt --all`.
+* Python changes: run `ruff format .` and `ruff check .` from the repository root,
+  and run targeted tests through `cd python && uv run ...`.
+* TypeScript changes: run the relevant `npm`/`pnpm` lint, format, build, and docs commands in `nodejs`.
+
+Before creating a PR, the exact value passed to `gh pr create --title` must follow
+Conventional Commits, such as `fix: support nested field paths in native index creation`
+or `feat(python): add dataset multiprocessing support`. Do not use a plain natural
+language summary like `Support nested field paths in native index creation` as the PR
+title. The semantic-release check uses the PR title and body as the merge commit message,
+so a non-conventional PR title will fail CI. After creating a PR, read the remote PR title
+back and fix it immediately if it is not conventional.

 ## Coding tips

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=7.0.0", default-features = false }
+lance-core = "=7.0.0"
+lance-datagen = "=7.0.0"
+lance-file = "=7.0.0"
+lance-io = { "version" = "=7.0.0", default-features = false }
+lance-index = "=7.0.0"
+lance-linalg = "=7.0.0"
+lance-namespace = "=7.0.0"
+lance-namespace-impls = { "version" = "=7.0.0", default-features = false }
+lance-table = "=7.0.0"
+lance-testing = "=7.0.0"
+lance-datafusion = "=7.0.0"
+lance-encoding = "=7.0.0"
+lance-arrow = "=7.0.0"
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "58.0.0", optional = false }
--- a/ci/check_lance_release.py
+++ b/ci/check_lance_release.py
@@ -112,25 +112,25 @@ def fetch_remote_tags() -> List[TagInfo]:
            "api",
            "-X",
            "GET",
-            f"repos/{LANCE_REPO}/git/refs/tags",
-            "--paginate",
+            f"repos/{LANCE_REPO}/releases",
            "--jq",
-            ".[].ref",
+            ".[].tag_name",
+            "-F",
+            "per_page=20",
        ]
    )
    tags: List[TagInfo] = []
    for line in output.splitlines():
-        ref = line.strip()
-        if not ref.startswith("refs/tags/v"):
+        tag = line.strip()
+        if not tag.startswith("v"):
            continue
-        tag = ref.split("refs/tags/")[-1]
        version = tag.lstrip("v")
        try:
            tags.append(TagInfo(tag=tag, version=version, semver=parse_semver(version)))
        except ValueError:
            continue
    if not tags:
-        raise RuntimeError("No Lance tags could be parsed from GitHub API output")
+        raise RuntimeError("No Lance releases could be parsed from GitHub API output")
    return tags


--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.29.1-beta.0</version>
+    <version>0.30.0</version>
 </dependency>
 ```

--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -441,18 +441,28 @@ Open a table in the database.

 ```ts
 abstract renameTable(
-   oldName,
+   currentName,
   newName,
-   namespacePath?): Promise<void>
+   options?): Promise<void>
 ```

+Rename a table.
+
+Currently only supported by LanceDB Cloud. Local OSS connections and
+namespace-backed connections (via [connectNamespace](../functions/connectNamespace.md)) reject with
+a "not supported" error.
+
 #### Parameters

-* **oldName**: `string`
+* **currentName**: `string`
+    The current name of the table.

 * **newName**: `string`
+    The new name for the table.

-* **namespacePath?**: `string`[]
+* **options?**: [`RenameTableOptions`](../interfaces/RenameTableOptions.md)
+    Optional namespace paths. When
+    `newNamespacePath` is omitted the table stays in `namespacePath`.

 #### Returns

--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -87,6 +87,7 @@
 - [OptimizeStats](interfaces/OptimizeStats.md)
 - [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
 - [RemovalStats](interfaces/RemovalStats.md)
+- [RenameTableOptions](interfaces/RenameTableOptions.md)
 - [RestNamespaceConfig](interfaces/RestNamespaceConfig.md)
 - [RetryConfig](interfaces/RetryConfig.md)
 - [ScannableOptions](interfaces/ScannableOptions.md)
@@ -104,6 +105,7 @@
 - [UpdateResult](interfaces/UpdateResult.md)
 - [Version](interfaces/Version.md)
 - [WriteExecutionOptions](interfaces/WriteExecutionOptions.md)
+- [WriteProgress](interfaces/WriteProgress.md)

 ## Type Aliases

--- a/docs/src/js/interfaces/AddDataOptions.md
+++ b/docs/src/js/interfaces/AddDataOptions.md
@@ -19,3 +19,39 @@ mode: "append" | "overwrite";
 If "append" (the default) then the new data will be added to the table

 If "overwrite" then the new data will replace the existing data in the table.
+
+***
+
+### progress()
+
+```ts
+progress: (progress) => void;
+```
+
+Optional callback invoked periodically with write progress.
+
+The callback is fired once per batch written and once more with
+`done: true` when the write completes. Calls are dispatched
+asynchronously to the JS event loop and never block the write — a slow
+callback will queue events rather than back-pressure the writer.
+
+Errors thrown from the callback are logged with `console.warn` and
+swallowed — they do not abort the write.
+
+#### Parameters
+
+* **progress**: [`WriteProgress`](WriteProgress.md)
+
+#### Returns
+
+`void`
+
+#### Example
+
+```ts
+await table.add(data, {
+  progress: (p) => {
+    console.log(`${p.outputRows}/${p.totalRows ?? "?"} rows`);
+  },
+});
+```
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,16 +70,20 @@ client used by manifest-enabled native connections.
 optional readConsistencyInterval: number;
 ```

-(For LanceDB OSS only): The interval, in seconds, at which to check for
-updates to the table from other processes. If None, then consistency is not
-checked. For performance reasons, this is the default. For strong
-consistency, set this to zero seconds. Then every read will check for
-updates from other processes. As a compromise, you can set this to a
-non-zero value for eventual consistency. If more than that interval
-has passed since the last check, then the table will be checked for updates.
-Note: this consistency only applies to read operations. Write operations are
+The interval, in seconds, at which to check for updates to the table
+from other processes. If None, then consistency is not checked. For
+performance reasons, this is the default. For strong consistency, set
+this to zero seconds. Then every read will check for updates from other
+processes. As a compromise, you can set this to a non-zero value for
+eventual consistency. If more than that interval has passed since the
+last check, then the table will be checked for updates. Note: this
+consistency only applies to read operations. Write operations are
 always consistent.

+Stronger consistency is not free. The smaller the interval, the more
+often each read pays the cost of checking for updates against object
+storage, raising per-read latency and cost.
+
 ***

 ### region?
--- a/docs/src/js/interfaces/RenameTableOptions.md
+++ b/docs/src/js/interfaces/RenameTableOptions.md
@@ -0,0 +1,29 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RenameTableOptions
+
+# Interface: RenameTableOptions
+
+## Properties
+
+### namespacePath?
+
+```ts
+optional namespacePath: string[];
+```
+
+The namespace path of the table being renamed. Defaults to the root
+namespace (`[]`) when omitted.
+
+***
+
+### newNamespacePath?
+
+```ts
+optional newNamespacePath: string[];
+```
+
+The namespace path to move the table to as part of the rename. When
+omitted the table stays in `namespacePath`.
--- a/docs/src/js/interfaces/WriteProgress.md
+++ b/docs/src/js/interfaces/WriteProgress.md
@@ -0,0 +1,84 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / WriteProgress
+
+# Interface: WriteProgress
+
+Progress snapshot for a write operation, delivered to the `progress`
+callback passed to [Table.add](../classes/Table.md#add).
+
+## Properties
+
+### activeTasks
+
+```ts
+activeTasks: number;
+```
+
+Number of parallel write tasks currently in flight.
+
+***
+
+### done
+
+```ts
+done: boolean;
+```
+
+`true` for the final callback; `false` otherwise.
+
+***
+
+### elapsedSeconds
+
+```ts
+elapsedSeconds: number;
+```
+
+Wall-clock seconds since the write started.
+
+***
+
+### outputBytes
+
+```ts
+outputBytes: number;
+```
+
+Number of bytes written so far.
+
+***
+
+### outputRows
+
+```ts
+outputRows: number;
+```
+
+Number of rows written so far.
+
+***
+
+### totalRows?
+
+```ts
+optional totalRows: number;
+```
+
+Total rows expected, when the input source reports it.
+
+Always set on the final callback (the one with `done: true`), falling
+back to the actual number of rows written when the source could not
+report a row count up front.
+
+***
+
+### totalTasks
+
+```ts
+totalTasks: number;
+```
+
+Total number of parallel write tasks (the write parallelism).
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -166,6 +166,12 @@ lists the indices that LanceDb supports.

 ::: lancedb.index.IvfFlat

+::: lancedb.index.IvfSq
+
+::: lancedb.index.IvfRq
+
+::: lancedb.index.HnswFlat
+
 ::: lancedb.table.IndexStatistics

 ## Querying (Asynchronous)
--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.29.1-beta.0</version>
+      <version>0.30.0-final.0</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.29.1-beta.0</version>
+    <version>0.30.0-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>7.0.0-beta.13</lance-core.version>
+        <lance-core.version>7.0.0</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.29.1-beta.0"
+version = "0.30.0"
 publish = false
 license.workspace = true
 description.workspace = true
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -47,6 +47,14 @@ describe("given a connection", () => {
    await db.close();
    expect(db.isOpen()).toBe(false);
    await expect(db.tableNames()).rejects.toThrow("Connection is closed");
+    await expect(db.renameTable("a", "b")).rejects.toThrow(
+      "Connection is closed",
+    );
+  });
+
+  it("should report renameTable as unsupported on an OSS connection", async () => {
+    await db.createTable("a", [{ id: 1 }]);
+    await expect(db.renameTable("a", "b")).rejects.toThrow(/not supported/);
  });
  it("should be able to create a table from an object arg `createTable(options)`, or args `createTable(name, data, options)`", async () => {
    let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
@@ -81,16 +89,6 @@ describe("given a connection", () => {
    await db.createTable("test4", [{ id: 1 }, { id: 2 }]);
  });

-  it("should expose renameTable and reject on OSS listing DB", async () => {
-    await db.createTable("old_name", [{ id: 1 }]);
-
-    await expect(db.renameTable("old_name", "new_name")).rejects.toThrow(
-      "rename_table is not supported in LanceDB OSS",
-    );
-
-    await expect(db.tableNames()).resolves.toEqual(["old_name"]);
-  });
-
  it("should fail if creating table twice, unless overwrite is true", async () => {
    let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
    await expect(tbl.countRows()).resolves.toBe(2);
@@ -173,18 +171,22 @@ describe("given a connection", () => {

    let manifestDir =
      tmpDir.name + "/test_manifest_paths_v2_empty.lance/_versions";
-    readdirSync(manifestDir).forEach((file) => {
-      expect(file).toMatch(/^\d{20}\.manifest$/);
-    });
+    readdirSync(manifestDir)
+      .filter((f) => f.endsWith(".manifest"))
+      .forEach((file) => {
+        expect(file).toMatch(/^\d{20}\.manifest$/);
+      });

    table = (await db.createTable("test_manifest_paths_v2", [{ id: 1 }], {
      enableV2ManifestPaths: true,
    })) as LocalTable;
    expect(await table.usesV2ManifestPaths()).toBe(true);
    manifestDir = tmpDir.name + "/test_manifest_paths_v2.lance/_versions";
-    readdirSync(manifestDir).forEach((file) => {
-      expect(file).toMatch(/^\d{20}\.manifest$/);
-    });
+    readdirSync(manifestDir)
+      .filter((f) => f.endsWith(".manifest"))
+      .forEach((file) => {
+        expect(file).toMatch(/^\d{20}\.manifest$/);
+      });
  });

  it("should be able to migrate tables to the V2 manifest paths", async () => {
@@ -201,16 +203,20 @@ describe("given a connection", () => {

    const manifestDir =
      tmpDir.name + "/test_manifest_path_migration.lance/_versions";
-    readdirSync(manifestDir).forEach((file) => {
-      expect(file).toMatch(/^\d\.manifest$/);
-    });
+    readdirSync(manifestDir)
+      .filter((f) => f.endsWith(".manifest"))
+      .forEach((file) => {
+        expect(file).toMatch(/^\d\.manifest$/);
+      });

    await table.migrateManifestPathsV2();
    expect(await table.usesV2ManifestPaths()).toBe(true);

-    readdirSync(manifestDir).forEach((file) => {
-      expect(file).toMatch(/^\d{20}\.manifest$/);
-    });
+    readdirSync(manifestDir)
+      .filter((f) => f.endsWith(".manifest"))
+      .forEach((file) => {
+        expect(file).toMatch(/^\d{20}\.manifest$/);
+      });
  });
 });

--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -617,4 +617,68 @@ describe("remote connection", () => {
      );
    });
  });
+
+  describe("renameTable", () => {
+    async function captureRenameRequest(
+      call: (db: Connection) => Promise<void>,
+    ): Promise<{ url: string; body: Record<string, unknown> }> {
+      let captured: { url: string; body: Record<string, unknown> } | undefined;
+      await withMockDatabase((req, res) => {
+        let raw = "";
+        req.on("data", (chunk) => {
+          raw += chunk;
+        });
+        req.on("end", () => {
+          captured = {
+            url: req.url ?? "",
+            body: raw ? JSON.parse(raw) : {},
+          };
+          res.writeHead(200, { "Content-Type": "application/json" }).end("");
+        });
+      }, call);
+      if (!captured) {
+        throw new Error("mock server never saw a request");
+      }
+      return captured;
+    }
+
+    it("sends rename request for a table in the root namespace", async () => {
+      const { url, body } = await captureRenameRequest(async (db) => {
+        await db.renameTable("table1", "table2");
+      });
+      expect(url).toBe("/v1/table/table1/rename/");
+      // biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
+      expect(body).toEqual({ new_table_name: "table2" });
+    });
+
+    it("omits new_namespace when only the current namespace is supplied", async () => {
+      // Safe-default check: passing namespacePath alone must not send
+      // `new_namespace`, so the server keeps the table in its current
+      // namespace instead of silently moving it to root.
+      const { url, body } = await captureRenameRequest(async (db) => {
+        await db.renameTable("table1", "table2", {
+          namespacePath: ["ns1"],
+        });
+      });
+      expect(url).toBe("/v1/table/ns1$table1/rename/");
+      // biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
+      expect(body).toEqual({ new_table_name: "table2" });
+    });
+
+    it("includes new_namespace in the body for a cross-namespace rename", async () => {
+      const { url, body } = await captureRenameRequest(async (db) => {
+        await db.renameTable("table1", "table2", {
+          namespacePath: ["ns1"],
+          newNamespacePath: ["ns2"],
+        });
+      });
+      expect(url).toBe("/v1/table/ns1$table1/rename/");
+      expect(body).toEqual({
+        // biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
+        new_table_name: "table2",
+        // biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
+        new_namespace: ["ns2"],
+      });
+    });
+  });
 });
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -28,6 +28,7 @@ import {
  List,
  Schema,
  SchemaLike,
+  Struct,
  Type,
  Uint8,
  Utf8,
@@ -115,6 +116,48 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await expect(table.countRows()).resolves.toBe(1);
    });

+    it("should invoke the progress callback", async () => {
+      const events: import("../lancedb").WriteProgress[] = [];
+      await table.add([{ id: 1 }, { id: 2 }, { id: 3 }], {
+        progress: (p) => events.push(p),
+      });
+
+      expect(events.length).toBeGreaterThan(0);
+      const last = events[events.length - 1];
+      expect(last.done).toBe(true);
+      // Earlier callbacks must have done=false.
+      for (const ev of events.slice(0, -1)) {
+        expect(ev.done).toBe(false);
+      }
+      // outputRows reflects the rows added in this call, not table size.
+      expect(last.outputRows).toBe(3);
+      // The input source (an array) reports a row count, so totalRows is set.
+      expect(last.totalRows).toBe(3);
+      // outputRows is monotonic.
+      for (let i = 1; i < events.length; i++) {
+        expect(events[i].outputRows).toBeGreaterThanOrEqual(
+          events[i - 1].outputRows,
+        );
+      }
+    });
+
+    it("should swallow errors thrown from the progress callback", async () => {
+      const warn = jest
+        .spyOn(console, "warn")
+        .mockImplementation(() => undefined);
+      try {
+        const res = await table.add([{ id: 1 }, { id: 2 }], {
+          progress: () => {
+            throw new Error("callback bomb");
+          },
+        });
+        expect(res.version).toBeGreaterThan(0);
+        expect(warn).toHaveBeenCalled();
+      } finally {
+        warn.mockRestore();
+      }
+    });
+
    it("should let me close the table", async () => {
      expect(table.isOpen()).toBe(true);
      table.close();
@@ -738,6 +781,113 @@ describe("When creating an index", () => {
    expect(indices2.length).toBe(0);
  });

+  it("should create and search a nested vector index", async () => {
+    const db = await connect(tmpDir.name);
+    const nestedSchema = new Schema([
+      new Field("id", new Int32(), true),
+      new Field(
+        "image",
+        new Struct([
+          new Field(
+            "embedding",
+            new FixedSizeList(2, new Field("item", new Float32(), true)),
+            true,
+          ),
+        ]),
+        true,
+      ),
+    ]);
+    const nestedTable = await db.createTable(
+      "nested_vector",
+      makeArrowTable(
+        Array.from({ length: 300 }, (_, id) => ({
+          id,
+          image: { embedding: [id, id + 1] },
+        })),
+        { schema: nestedSchema },
+      ),
+    );
+
+    await nestedTable.createIndex("image.embedding", {
+      name: "image_embedding_idx",
+    });
+    const indices = await nestedTable.listIndices();
+    expect(indices).toContainEqual({
+      name: "image_embedding_idx",
+      indexType: "IvfPq",
+      columns: ["image.embedding"],
+    });
+
+    const explicit = await nestedTable
+      .query()
+      .nearestTo([0.0, 1.0])
+      .column("image.embedding")
+      .limit(1)
+      .toArray();
+    const inferred = await nestedTable
+      .query()
+      .nearestTo([0.0, 1.0])
+      .limit(1)
+      .toArray();
+    expect(inferred[0].id).toEqual(explicit[0].id);
+  });
+
+  it("should report multiple nested vector candidates", async () => {
+    const db = await connect(tmpDir.name);
+    const nestedSchema = new Schema([
+      new Field(
+        "image",
+        new Struct([
+          new Field(
+            "embedding",
+            new FixedSizeList(2, new Field("item", new Float32(), true)),
+            true,
+          ),
+        ]),
+        true,
+      ),
+      new Field(
+        "text",
+        new Struct([
+          new Field(
+            "embedding",
+            new FixedSizeList(2, new Field("item", new Float32(), true)),
+            true,
+          ),
+        ]),
+        true,
+      ),
+    ]);
+    const nestedTable = await db.createTable(
+      "multiple_nested_vectors",
+      makeArrowTable(
+        [
+          {
+            image: { embedding: [0.0, 1.0] },
+            text: { embedding: [2.0, 3.0] },
+          },
+        ],
+        { schema: nestedSchema },
+      ),
+    );
+
+    await expect(
+      nestedTable.query().nearestTo([0.0, 1.0]).limit(1).toArray(),
+    ).rejects.toThrow(/image\.embedding.*text\.embedding/);
+  });
+
+  it("should report when no default vector column exists", async () => {
+    const db = await connect(tmpDir.name);
+    const noVectorTable = await db.createTable(
+      "no_vector",
+      makeArrowTable([{ id: 0, label: "cat" }]),
+    );
+
+    await expect(
+      noVectorTable.query().nearestTo([0.0, 1.0]).limit(1).toArray(),
+    ).rejects.toThrow(/No vector column/);
+  });
+
  it("should wait for index readiness", async () => {
    // Create an index and then wait for it to be ready
    await tbl.createIndex("vec");
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -144,6 +144,19 @@ export interface DropNamespaceOptions {
  behavior?: "restrict" | "cascade";
 }

+export interface RenameTableOptions {
+  /**
+   * The namespace path of the table being renamed. Defaults to the root
+   * namespace (`[]`) when omitted.
+   */
+  namespacePath?: string[];
+  /**
+   * The namespace path to move the table to as part of the rename. When
+   * omitted the table stays in `namespacePath`.
+   */
+  newNamespacePath?: string[];
+}
+
 /**
 * A LanceDB Connection that allows you to open tables and create new ones.
 *
@@ -296,12 +309,6 @@ export abstract class Connection {
   */
  abstract dropTable(name: string, namespacePath?: string[]): Promise<void>;

-  abstract renameTable(
-    oldName: string,
-    newName: string,
-    namespacePath?: string[],
-  ): Promise<void>;
-
  /**
   * Drop all tables in the database.
   * @param {string[]} namespacePath The namespace path to drop tables from (defaults to root namespace).
@@ -397,6 +404,24 @@ export abstract class Connection {
      isShallow?: boolean;
    },
  ): Promise<Table>;
+
+  /**
+   * Rename a table.
+   *
+   * Currently only supported by LanceDB Cloud. Local OSS connections and
+   * namespace-backed connections (via {@link connectNamespace}) reject with
+   * a "not supported" error.
+   *
+   * @param {string} currentName - The current name of the table.
+   * @param {string} newName - The new name for the table.
+   * @param {RenameTableOptions} options - Optional namespace paths. When
+   *   `newNamespacePath` is omitted the table stays in `namespacePath`.
+   */
+  abstract renameTable(
+    currentName: string,
+    newName: string,
+    options?: RenameTableOptions,
+  ): Promise<void>;
 }

 /** @hideconstructor */
@@ -615,14 +640,6 @@ export class LocalConnection extends Connection {
    return this.inner.dropTable(name, namespacePath ?? []);
  }

-  async renameTable(
-    oldName: string,
-    newName: string,
-    namespacePath?: string[],
-  ): Promise<void> {
-    return this.inner.renameTable(oldName, newName, namespacePath ?? []);
-  }
-
  async dropAllTables(namespacePath?: string[]): Promise<void> {
    return this.inner.dropAllTables(namespacePath ?? []);
  }
@@ -665,6 +682,19 @@ export class LocalConnection extends Connection {
      options?.behavior,
    );
  }
+
+  async renameTable(
+    currentName: string,
+    newName: string,
+    options?: RenameTableOptions,
+  ): Promise<void> {
+    return this.inner.renameTable(
+      currentName,
+      newName,
+      options?.namespacePath ?? [],
+      options?.newNamespacePath,
+    );
+  }
 }

 /**
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -71,6 +71,7 @@ export {
  CreateNamespaceResponse,
  DropNamespaceResponse,
  DescribeNamespaceResponse,
+  RenameTableOptions,
 } from "./connection";

 export { Session } from "./native.js";
@@ -113,6 +114,7 @@ export {
  UpdateOptions,
  OptimizeOptions,
  Version,
+  WriteProgress,
  LsmWriteSpec,
  ColumnAlteration,
 } from "./table";
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -46,6 +46,33 @@ import { sanitizeType } from "./sanitize";
 import { IntoSql, toSQL } from "./util";
 export { IndexConfig } from "./native";

+/**
+ * Progress snapshot for a write operation, delivered to the `progress`
+ * callback passed to {@link Table.add}.
+ */
+export interface WriteProgress {
+  /** Number of rows written so far. */
+  outputRows: number;
+  /** Number of bytes written so far. */
+  outputBytes: number;
+  /**
+   * Total rows expected, when the input source reports it.
+   *
+   * Always set on the final callback (the one with `done: true`), falling
+   * back to the actual number of rows written when the source could not
+   * report a row count up front.
+   */
+  totalRows?: number;
+  /** Wall-clock seconds since the write started. */
+  elapsedSeconds: number;
+  /** Number of parallel write tasks currently in flight. */
+  activeTasks: number;
+  /** Total number of parallel write tasks (the write parallelism). */
+  totalTasks: number;
+  /** `true` for the final callback; `false` otherwise. */
+  done: boolean;
+}
+
 /**
 * Options for adding data to a table.
 */
@@ -56,6 +83,28 @@ export interface AddDataOptions {
   * If "overwrite" then the new data will replace the existing data in the table.
   */
  mode: "append" | "overwrite";
+
+  /**
+   * Optional callback invoked periodically with write progress.
+   *
+   * The callback is fired once per batch written and once more with
+   * `done: true` when the write completes. Calls are dispatched
+   * asynchronously to the JS event loop and never block the write — a slow
+   * callback will queue events rather than back-pressure the writer.
+   *
+   * Errors thrown from the callback are logged with `console.warn` and
+   * swallowed — they do not abort the write.
+   *
+   * @example
+   * ```ts
+   * await table.add(data, {
+   *   progress: (p) => {
+   *     console.log(`${p.outputRows}/${p.totalRows ?? "?"} rows`);
+   *   },
+   * });
+   * ```
+   */
+  progress: (progress: WriteProgress) => void;
 }

 export interface UpdateOptions {
@@ -705,7 +754,20 @@ export class LocalTable extends Table {
    const schema = await this.schema();

    const buffer = await fromDataToBuffer(data, undefined, schema);
-    return await this.inner.add(buffer, mode);
+    // Wrap the user callback so a thrown error doesn't surface as an
+    // unhandled exception (the callback fires from a napi threadsafe
+    // function — exceptions there crash the process).
+    const userProgress = options?.progress;
+    const progress = userProgress
+      ? (p: WriteProgress) => {
+          try {
+            userProgress(p);
+          } catch (e) {
+            console.warn("Table.add progress callback threw:", e);
+          }
+        }
+      : undefined;
+    return await this.inner.add(buffer, mode, progress);
  }

  async update(
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.29.1-beta.0",
+  "version": "0.30.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.29.1-beta.0",
+	"version": "0.30.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.29.1-beta.0",
+  "version": "0.30.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -328,20 +328,6 @@ impl Connection {
            .default_error()
    }

-    #[napi(catch_unwind)]
-    pub async fn rename_table(
-        &self,
-        old_name: String,
-        new_name: String,
-        namespace_path: Option<Vec<String>>,
-    ) -> napi::Result<()> {
-        let ns = namespace_path.unwrap_or_default();
-        self.get_inner()?
-            .rename_table(&old_name, &new_name, &ns, &ns)
-            .await
-            .default_error()
-    }
-
    #[napi(catch_unwind)]
    pub async fn drop_all_tables(&self, namespace_path: Option<Vec<String>>) -> napi::Result<()> {
        let ns = namespace_path.unwrap_or_default();
@@ -473,4 +459,23 @@ impl Connection {
            transaction_id: resp.transaction_id,
        })
    }
+
+    /// Rename a table. `current_namespace_path` and `new_namespace_path` default to
+    /// the root namespace when omitted; the caller is expected to either pass both
+    /// or pass neither.
+    #[napi(catch_unwind)]
+    pub async fn rename_table(
+        &self,
+        current_name: String,
+        new_name: String,
+        current_namespace_path: Option<Vec<String>>,
+        new_namespace_path: Option<Vec<String>>,
+    ) -> napi::Result<()> {
+        let cur_ns = current_namespace_path.unwrap_or_default();
+        let new_ns = new_namespace_path.unwrap_or_default();
+        self.get_inner()?
+            .rename_table(&current_name, &new_name, &cur_ns, &new_ns)
+            .await
+            .default_error()
+    }
 }
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -24,15 +24,19 @@ mod util;
 #[napi(object)]
 #[derive(Debug)]
 pub struct ConnectionOptions {
-    /// (For LanceDB OSS only): The interval, in seconds, at which to check for
-    /// updates to the table from other processes. If None, then consistency is not
-    /// checked. For performance reasons, this is the default. For strong
-    /// consistency, set this to zero seconds. Then every read will check for
-    /// updates from other processes. As a compromise, you can set this to a
-    /// non-zero value for eventual consistency. If more than that interval
-    /// has passed since the last check, then the table will be checked for updates.
-    /// Note: this consistency only applies to read operations. Write operations are
+    /// The interval, in seconds, at which to check for updates to the table
+    /// from other processes. If None, then consistency is not checked. For
+    /// performance reasons, this is the default. For strong consistency, set
+    /// this to zero seconds. Then every read will check for updates from other
+    /// processes. As a compromise, you can set this to a non-zero value for
+    /// eventual consistency. If more than that interval has passed since the
+    /// last check, then the table will be checked for updates. Note: this
+    /// consistency only applies to read operations. Write operations are
    /// always consistent.
+    ///
+    /// Stronger consistency is not free. The smaller the interval, the more
+    /// often each read pays the cost of checking for updates against object
+    /// storage, raising per-read latency and cost.
    pub read_consistency_interval: Option<f64>,
    /// (For LanceDB OSS only): configuration for object storage.
    ///
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -9,6 +9,7 @@ use lancedb::table::{
    OptimizeAction, OptimizeOptions, Table as LanceDbTable,
 };
 use napi::bindgen_prelude::*;
+use napi::threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode};
 use napi_derive::napi;

 use crate::error::NapiErrorExt;
@@ -67,8 +68,16 @@ impl Table {
        schema_to_buffer(&schema)
    }

-    #[napi(catch_unwind)]
-    pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
+    #[napi(
+        catch_unwind,
+        ts_args_type = "buf: Buffer, mode: string, progressCallback?: (progress: WriteProgressInfo) => void"
+    )]
+    pub async fn add(
+        &self,
+        buf: Buffer,
+        mode: String,
+        progress_callback: Option<ProgressFn>,
+    ) -> napi::Result<AddResult> {
        let batches = ipc_file_to_batches(buf.to_vec())
            .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
        let batches = batches
@@ -92,6 +101,19 @@ impl Table {
            return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode)));
        };

+        if let Some(tsfn) = progress_callback {
+            op = op.progress(move |p| {
+                // NonBlocking: dispatch onto the JS event loop without
+                // blocking the writer thread.  With napi-rs's default
+                // unbounded queue, events are not dropped — a slow JS
+                // callback will just queue them.
+                tsfn.call(
+                    WriteProgressInfo::from(p),
+                    ThreadsafeFunctionCallMode::NonBlocking,
+                );
+            });
+        }
+
        let res = op.execute().await.default_error()?;
        Ok(res.into())
    }
@@ -654,6 +676,44 @@ pub struct OptimizeStats {
    pub prune: RemovalStats,
 }

+/// Progress snapshot for a write operation, delivered to the JS callback
+/// passed to `Table.add`.
+#[napi(object)]
+#[derive(Clone, Debug)]
+pub struct WriteProgressInfo {
+    /// Number of rows written so far.
+    pub output_rows: i64,
+    /// Number of bytes written so far.
+    pub output_bytes: i64,
+    /// Total rows expected, if the input source reports it.
+    /// Always set on the final callback (where `done` is `true`).
+    pub total_rows: Option<i64>,
+    /// Wall-clock seconds since monitoring started.
+    pub elapsed_seconds: f64,
+    /// Number of parallel write tasks currently in flight.
+    pub active_tasks: i64,
+    /// Total number of parallel write tasks (the write parallelism).
+    pub total_tasks: i64,
+    /// `true` for the final callback; `false` otherwise.
+    pub done: bool,
+}
+
+impl From<&lancedb::table::write_progress::WriteProgress> for WriteProgressInfo {
+    fn from(p: &lancedb::table::write_progress::WriteProgress) -> Self {
+        Self {
+            output_rows: p.output_rows() as i64,
+            output_bytes: p.output_bytes() as i64,
+            total_rows: p.total_rows().map(|n| n as i64),
+            elapsed_seconds: p.elapsed().as_secs_f64(),
+            active_tasks: p.active_tasks() as i64,
+            total_tasks: p.total_tasks() as i64,
+            done: p.done(),
+        }
+    }
+}
+
+type ProgressFn = ThreadsafeFunction<WriteProgressInfo, (), WriteProgressInfo, Status, false>;
+
 ///  A definition of a column alteration. The alteration changes the column at
 /// `path` to have the new name `name`, to be nullable if `nullable` is true,
 /// and to have the data type `data_type`. At least one of `rename` or `nullable`
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.32.1-beta.0"
+current_version = "0.33.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/AGENTS.md
+++ b/python/AGENTS.md
@@ -4,16 +4,26 @@ code is in the `src/` directory and the Python bindings are in the `lancedb/` di

 Common commands:

+* Bootstrap dev env: `uv run --extra tests --extra dev maturin develop --extras tests,dev`
 * Build: `make develop`
 * Format: `make format`
 * Lint: `make check`
 * Fix lints: `make fix`
-* Test: `make test`
-* Doc test: `make doctest`
+* Test: `uv run --extra tests pytest python/tests -vv --durations=10 -m "not slow and not s3_test"`
+* Run specific test: `uv run --extra tests pytest python/tests/<test_file>.py::<test_name> -q`
+* Doc test: `uv run --extra tests pytest --doctest-modules python/lancedb`
+
+Use the uv-managed environment declared by `uv.lock` for Python validation. Do
+not treat system `python`, global `pytest`, or missing editable-install errors
+as final blockers; bootstrap or enter the uv environment instead. `make test`
+and `make doctest` assume the development environment is already prepared.

 Before committing changes, run lints and then formatting.

-When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
+When you change the Rust code, PyO3 binding code, or see a missing/stale
+`lancedb._lancedb`, recompile the Python bindings with
+`uv run --extra tests --extra dev maturin develop --extras tests,dev` before
+running tests.

 When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
 with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.32.1-beta.0"
+version = "0.33.0"
 publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -94,7 +94,6 @@ def connect(
    host_override: str, optional
        The override url for LanceDB Cloud.
    read_consistency_interval: timedelta, default None
-        (For LanceDB OSS only)
        The interval at which to check for updates to the table from other
        processes. If None, then consistency is not checked. For performance
        reasons, this is the default. For strong consistency, set this to
@@ -104,6 +103,10 @@ def connect(
        the last check, then the table will be checked for updates. Note: this
        consistency only applies to read operations. Write operations are
        always consistent.
+
+        Stronger consistency is not free. The smaller the interval, the more
+        often each read pays the cost of checking for updates against object
+        storage, raising per-read latency and cost.
    client_config: ClientConfig or dict, optional
        Configuration options for the LanceDB Cloud HTTP client. If a dict, then
        the keys are the attributes of the ClientConfig class. If None, then the
@@ -147,6 +150,13 @@ def connect(
    >>> db = lancedb.connect("s3://my-bucket/lancedb",
    ...                      storage_options={"aws_access_key_id": "***"})

+    For tests and temporary data, use an in-memory database:
+
+    >>> db = lancedb.connect("memory://")
+
+    In-memory databases are not persisted. Tables are dropped when the last
+    connection or table handle referencing them is closed.
+
    Connect to LanceDB cloud:

    >>> db = lancedb.connect("db://my_database", api_key="ldb_...",
@@ -210,6 +220,7 @@ def connect(
            request_thread_pool=request_thread_pool,
            client_config=client_config,
            storage_options=storage_options,
+            read_consistency_interval=read_consistency_interval,
            **kwargs,
        )
    _check_s3_bucket_with_dots(str(uri), storage_options)
@@ -336,7 +347,6 @@ async def connect_async(
    host_override: str, optional
        The override url for LanceDB Cloud.
    read_consistency_interval: timedelta, default None
-        (For LanceDB OSS only)
        The interval at which to check for updates to the table from other
        processes. If None, then consistency is not checked. For performance
        reasons, this is the default. For strong consistency, set this to
@@ -346,6 +356,10 @@ async def connect_async(
        the last check, then the table will be checked for updates. Note: this
        consistency only applies to read operations. Write operations are
        always consistent.
+
+        Stronger consistency is not free. The smaller the interval, the more
+        often each read pays the cost of checking for updates against object
+        storage, raising per-read latency and cost.
    client_config: ClientConfig or dict, optional
        Configuration options for the LanceDB Cloud HTTP client. If a dict, then
        the keys are the attributes of the ClientConfig class. If None, then the
@@ -378,6 +392,8 @@ async def connect_async(
    ...     db = await lancedb.connect_async("s3://my-bucket/lancedb",
    ...                                      storage_options={
    ...                                          "aws_access_key_id": "***"})
+    ...     # For tests and temporary data, use an in-memory database
+    ...     db = await lancedb.connect_async("memory://")
    ...     # Connect to LanceDB cloud
    ...     db = await lancedb.connect_async("db://my_database", api_key="ldb_...",
    ...                                      client_config={
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -8,7 +8,17 @@ from abc import abstractmethod
 from datetime import timedelta
 from pathlib import Path
 import sys
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Literal,
+    Optional,
+    Union,
+)

 if sys.version_info >= (3, 12):
    from typing import override
@@ -313,7 +323,7 @@ class DBConnection(EnforceOverrides):
        >>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
        ...         {"vector": [0.2, 1.8], "lat": 40.1, "long":  -74.1}]
        >>> db.create_table("my_table", data)
-        LanceTable(name='my_table', version=1, ...)
+        LanceTable(name='my_table', ...)
        >>> db["my_table"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
@@ -334,7 +344,7 @@ class DBConnection(EnforceOverrides):
        ...    "long": [-122.7, -74.1]
        ... })
        >>> db.create_table("table2", data)
-        LanceTable(name='table2', version=1, ...)
+        LanceTable(name='table2', ...)
        >>> db["table2"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
@@ -357,7 +367,7 @@ class DBConnection(EnforceOverrides):
        ...   pa.field("long", pa.float32())
        ... ])
        >>> db.create_table("table3", data, schema = custom_schema)
-        LanceTable(name='table3', version=1, ...)
+        LanceTable(name='table3', ...)
        >>> db["table3"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
@@ -391,7 +401,7 @@ class DBConnection(EnforceOverrides):
        ...     pa.field("price", pa.float32()),
        ... ])
        >>> db.create_table("table4", make_batches(), schema=schema)
-        LanceTable(name='table4', version=1, ...)
+        LanceTable(name='table4', ...)

        """
        raise NotImplementedError
@@ -568,15 +578,15 @@ class LanceDBConnection(DBConnection):
    >>> db = lancedb.connect("./.lancedb")
    >>> db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2},
    ...                                   {"vector": [0.5, 1.3], "b": 4}])
-    LanceTable(name='my_table', version=1, ...)
+    LanceTable(name='my_table', ...)
    >>> db.create_table("another_table", data=[{"vector": [0.4, 0.4], "b": 6}])
-    LanceTable(name='another_table', version=1, ...)
+    LanceTable(name='another_table', ...)
    >>> sorted(db.table_names())
    ['another_table', 'my_table']
    >>> len(db)
    2
    >>> db["my_table"]
-    LanceTable(name='my_table', version=1, ...)
+    LanceTable(name='my_table', ...)
    >>> "my_table" in db
    True
    >>> db.drop_table("my_table")
@@ -847,11 +857,20 @@ class LanceDBConnection(DBConnection):
            )
        )

+    def _all_table_names(self) -> Generator[str, None, None]:
+        page_token = None
+        while True:
+            response = self.list_tables(page_token=page_token)
+            yield from response.tables
+            page_token = response.page_token
+            if not page_token:
+                return
+
    def __len__(self) -> int:
-        return len(self.table_names())
+        return sum(1 for _ in self._all_table_names())

    def __contains__(self, name: str) -> bool:
-        return name in self.table_names()
+        return name in self._all_table_names()

    @override
    def create_table(
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -3,12 +3,14 @@

 from __future__ import annotations

+import asyncio
 from abc import ABC, abstractmethod
 from concurrent.futures import ThreadPoolExecutor
-from enum import Enum
 from datetime import timedelta
+from enum import Enum
 from typing import (
    TYPE_CHECKING,
+    Any,
    Dict,
    List,
    Literal,
@@ -17,41 +19,40 @@ from typing import (
    Type,
    TypeVar,
    Union,
-    Any,
 )

-import asyncio
 import deprecation
 import numpy as np
 import pyarrow as pa
 import pyarrow.compute as pc
 import pydantic
+from typing_extensions import Annotated

-from lancedb.pydantic import PYDANTIC_VERSION
+from lancedb._lancedb import fts_query_to_json
 from lancedb.background_loop import LOOP
+from lancedb.pydantic import PYDANTIC_VERSION

 from . import __version__
 from .arrow import AsyncRecordBatchReader
 from .dependencies import pandas as pd
+from .expr import Expr
 from .rerankers.base import Reranker
 from .rerankers.rrf import RRFReranker
 from .rerankers.util import check_reranker_result
 from .util import flatten_columns
-from .expr import Expr
-from lancedb._lancedb import fts_query_to_json
-from typing_extensions import Annotated

 if TYPE_CHECKING:
    import sys
+
    import PIL
    import polars as pl

-    from ._lancedb import Query as LanceQuery
    from ._lancedb import FTSQuery as LanceFTSQuery
    from ._lancedb import HybridQuery as LanceHybridQuery
-    from ._lancedb import VectorQuery as LanceVectorQuery
-    from ._lancedb import TakeQuery as LanceTakeQuery
    from ._lancedb import PyQueryRequest
+    from ._lancedb import Query as LanceQuery
+    from ._lancedb import TakeQuery as LanceTakeQuery
+    from ._lancedb import VectorQuery as LanceVectorQuery
    from .common import VEC
    from .pydantic import LanceModel
    from .table import Table
@@ -718,6 +719,7 @@ class LanceQueryBuilder(ABC):
        flatten: Optional[Union[int, bool]] = None,
        *,
        timeout: Optional[timedelta] = None,
+        **kwargs,
    ) -> "pd.DataFrame":
        """
        Execute the query and return the results as a pandas DataFrame.
@@ -735,9 +737,12 @@ class LanceQueryBuilder(ABC):
        timeout: Optional[timedelta]
            The maximum time to wait for the query to complete.
            If None, wait indefinitely.
+        **kwargs
+            Forwarded to pyarrow.Table.to_pandas after query execution and
+            optional flattening.
        """
        tbl = flatten_columns(self.to_arrow(timeout=timeout), flatten)
-        return tbl.to_pandas()
+        return tbl.to_pandas(**kwargs)

    @abstractmethod
    def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
@@ -2352,6 +2357,7 @@ class AsyncQueryBase(object):
        self,
        flatten: Optional[Union[int, bool]] = None,
        timeout: Optional[timedelta] = None,
+        **kwargs,
    ) -> "pd.DataFrame":
        """
        Execute the query and collect the results into a pandas DataFrame.
@@ -2384,10 +2390,13 @@ class AsyncQueryBase(object):
            The maximum time to wait for the query to complete.
            If not specified, no timeout is applied. If the query does not
            complete within the specified time, an error will be raised.
+        **kwargs
+            Forwarded to pyarrow.Table.to_pandas after query execution and
+            optional flattening.
        """
        return (
            flatten_columns(await self.to_arrow(timeout=timeout), flatten)
-        ).to_pandas()
+        ).to_pandas(**kwargs)

    async def to_polars(
        self,
@@ -3340,16 +3349,18 @@ class BaseQueryBuilder(object):
            If not specified, no timeout is applied. If the query does not
            complete within the specified time, an error will be raised.
        """
-        async_iter = LOOP.run(self._inner.execute(max_batch_length, timeout))
+        async_reader = LOOP.run(
+            self._inner.to_batches(max_batch_length=max_batch_length, timeout=timeout)
+        )

        def iter_sync():
            try:
                while True:
-                    yield LOOP.run(async_iter.__anext__())
+                    yield LOOP.run(async_reader.__anext__())
            except StopAsyncIteration:
                return

-        return pa.RecordBatchReader.from_batches(async_iter.schema, iter_sync())
+        return pa.RecordBatchReader.from_batches(async_reader.schema, iter_sync())

    def to_arrow(self, timeout: Optional[timedelta] = None) -> pa.Table:
        """
@@ -3389,6 +3400,7 @@ class BaseQueryBuilder(object):
        self,
        flatten: Optional[Union[int, bool]] = None,
        timeout: Optional[timedelta] = None,
+        **kwargs,
    ) -> "pd.DataFrame":
        """
        Execute the query and collect the results into a pandas DataFrame.
@@ -3421,8 +3433,11 @@ class BaseQueryBuilder(object):
            The maximum time to wait for the query to complete.
            If not specified, no timeout is applied. If the query does not
            complete within the specified time, an error will be raised.
+        **kwargs
+            Forwarded to pyarrow.Table.to_pandas after query execution and
+            optional flattening.
        """
-        return LOOP.run(self._inner.to_pandas(flatten, timeout))
+        return LOOP.run(self._inner.to_pandas(flatten, timeout, **kwargs))

    def to_polars(
        self,
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -50,6 +50,7 @@ class RemoteDBConnection(DBConnection):
        connection_timeout: Optional[float] = None,
        read_timeout: Optional[float] = None,
        storage_options: Optional[Dict[str, str]] = None,
+        read_consistency_interval: Optional[timedelta] = None,
    ):
        """Connect to a remote LanceDB database."""
        if isinstance(client_config, dict):
@@ -103,6 +104,7 @@ class RemoteDBConnection(DBConnection):
                host_override=host_override,
                client_config=client_config,
                storage_options=storage_options,
+                read_consistency_interval=read_consistency_interval,
            )
        )

--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -40,7 +40,7 @@ from lancedb.embeddings import EmbeddingFunctionRegistry
 from lancedb.table import _normalize_progress

 from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
-from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
+from ..table import AsyncTable, BlobMode, IndexStatistics, Query, Table, Tags
 from ..types import BaseTokenizerType


@@ -101,7 +101,7 @@ class RemoteTable(Table):
        """to_arrow() is not yet supported on LanceDB cloud."""
        raise NotImplementedError("to_arrow() is not yet supported on LanceDB cloud.")

-    def to_pandas(self):
+    def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs):
        """to_pandas() is not yet supported on LanceDB cloud."""
        raise NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

--- a/python/python/lancedb/rerankers/linear_combination.py
+++ b/python/python/lancedb/rerankers/linear_combination.py
@@ -102,8 +102,15 @@ class LinearCombinationReranker(Reranker):

        combined_list = []
        for row_id, result in results.items():
+            # Convert vector distance to a relevance score in [0, 1] where
+            # higher is better.  Missing vector entries are penalised with
+            # `_invert_score(fill)` = 1 - fill (= 0.0 for the default fill=1).
            vector_score = self._invert_score(result.get("_distance", fill))
-            fts_score = result.get("_score", fill)
+            # FTS scores (BM25) are already in a "higher = more relevant" space.
+            # Missing FTS entries are penalised symmetrically: we use
+            # `1 - fill` so that the same `fill` value drives both missing-vector
+            # and missing-FTS penalties in the same direction.
+            fts_score = result.get("_score", 1 - fill)
            result["_relevance_score"] = self._combine_score(vector_score, fts_score)
            combined_list.append(result)

@@ -123,8 +130,12 @@ class LinearCombinationReranker(Reranker):
        return tbl

    def _combine_score(self, vector_score, fts_score):
-        # these scores represent distance
-        return 1 - (self.weight * vector_score + (1 - self.weight) * fts_score)
+        # Both vector_score (inverted distance) and fts_score are in a
+        # "higher = more relevant" space.  A straight weighted average gives
+        # higher _relevance_score to better matches, as expected.
+        # Previously this returned `1 - (...)` which inverted the final
+        # ranking so that the *least* relevant document ranked first.
+        return self.weight * vector_score + (1 - self.weight) * fts_score

    def _invert_score(self, dist: float):
        # Invert the score between relevance and distance
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -87,6 +87,8 @@ from .util import (
 )
 from .index import lang_mapping

+BlobMode = Literal["lazy", "bytes", "descriptions"]
+
 _MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
 _MODEL_BACKED_TOKENIZER_ERRORS = (
    "unknown base tokenizer",
@@ -760,14 +762,22 @@ class Table(ABC):
        """
        raise NotImplementedError

-    def to_pandas(self) -> "pandas.DataFrame":
+    def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pandas.DataFrame":
        """Return the table as a pandas DataFrame.

+        Parameters
+        ----------
+        blob_mode: str, default "lazy"
+            Controls how blob columns are returned for backends that support
+            Lance blob-aware pandas conversion.
+        **kwargs
+            Forwarded to PyArrow / Lance pandas conversion.
+
        Returns
        -------
        pd.DataFrame
        """
-        return self.to_arrow().to_pandas()
+        return self.to_arrow().to_pandas(**kwargs)

    @abstractmethod
    def to_arrow(self) -> pa.Table:
@@ -2168,7 +2178,7 @@ class LanceTable(Table):
        return LOOP.run(self._table.count_rows(filter))

    def __repr__(self) -> str:
-        val = f"{self.__class__.__name__}(name={self.name!r}, version={self.version}"
+        val = f"{self.__class__.__name__}(name={self.name!r}"
        if self._conn.read_consistency_interval is not None:
            val += ", read_consistency_interval={!r}".format(
                self._conn.read_consistency_interval
@@ -2183,14 +2193,27 @@ class LanceTable(Table):
        """Return the first n rows of the table."""
        return LOOP.run(self._table.head(n))

-    def to_pandas(self) -> "pd.DataFrame":
+    def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pd.DataFrame":
        """Return the table as a pandas DataFrame.

+        Parameters
+        ----------
+        blob_mode: str, default "lazy"
+            Controls how Lance blob columns are returned.
+        **kwargs
+            Forwarded to Lance pandas conversion.
+
        Returns
        -------
        pd.DataFrame
        """
-        return self.to_arrow().to_pandas()
+        if blob_mode == "lazy" and (
+            self._namespace_client is not None
+            or get_uri_scheme(self._dataset_path) == "memory"
+        ):
+            return self.to_arrow().to_pandas(**kwargs)
+
+        return self.to_lance().to_pandas(blob_mode=blob_mode, **kwargs)

    def to_arrow(self) -> pa.Table:
        """Return the table as a pyarrow Table.
@@ -2519,11 +2542,6 @@ class LanceTable(Table):
                "at a time. To search over multiple text fields, create a "
                "separate FTS index for each field."
            )
-        if "." in field_names:
-            raise ValueError(
-                "Native FTS indexes can only be created on top-level fields. "
-                f"Received nested field path: {field_names!r}."
-            )

        if tokenizer_name is None:
            tokenizer_configs = {
@@ -3945,14 +3963,39 @@ class AsyncTable:
        """
        return AsyncQuery(self._inner.query())

-    async def to_pandas(self) -> "pd.DataFrame":
+    async def _to_lance(self, **kwargs) -> lance.LanceDataset:
+        try:
+            import lance
+        except ImportError:
+            raise ImportError(
+                "The lance library is required to use this function. "
+                "Please install with `pip install pylance`."
+            )
+
+        return lance.dataset(
+            await self.uri(),
+            version=await self.version(),
+            storage_options=await self.latest_storage_options(),
+            **kwargs,
+        )
+
+    async def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pd.DataFrame":
        """Return the table as a pandas DataFrame.

+        Parameters
+        ----------
+        blob_mode: str, default "lazy"
+            Controls how Lance blob columns are returned.
+        **kwargs
+            Forwarded to PyArrow / Lance pandas conversion.
+
        Returns
        -------
        pd.DataFrame
        """
-        return (await self.to_arrow()).to_pandas()
+        if blob_mode == "lazy":
+            return (await self.to_arrow()).to_pandas(**kwargs)
+        return (await self._to_lance()).to_pandas(blob_mode=blob_mode, **kwargs)

    async def to_arrow(self) -> pa.Table:
        """Return the table as a pyarrow Table.
--- a/python/python/lancedb/util.py
+++ b/python/python/lancedb/util.py
@@ -10,7 +10,7 @@ import pathlib
 import warnings
 from datetime import date, datetime
 from functools import singledispatch
-from typing import Tuple, Union, Optional, Any
+from typing import Tuple, Union, Optional, Any, List
 from urllib.parse import urlparse

 import numpy as np
@@ -189,7 +189,33 @@ def flatten_columns(tbl: pa.Table, flatten: Optional[Union[int, bool]] = None):
    return tbl


-def inf_vector_column_query(schema: pa.Schema) -> str:
+def _format_field_path(path: List[str]) -> str:
+    def format_segment(segment: str) -> str:
+        if all(char.isalnum() or char == "_" for char in segment):
+            return segment
+        return f"`{segment.replace('`', '``')}`"
+
+    return ".".join(format_segment(segment) for segment in path)
+
+
+def _iter_vector_columns(
+    field: pa.Field, path: List[str], dim: Optional[int] = None
+) -> List[str]:
+    field_path = [*path, field.name]
+    if is_vector_column(field.type):
+        vector_dim = infer_vector_column_dim(field.type)
+        if dim is None or vector_dim == dim:
+            return [_format_field_path(field_path)]
+        return []
+    if pa.types.is_struct(field.type):
+        columns = []
+        for idx in range(field.type.num_fields):
+            columns.extend(_iter_vector_columns(field.type.field(idx), field_path, dim))
+        return columns
+    return []
+
+
+def inf_vector_column_query(schema: pa.Schema, dim: Optional[int] = None) -> str:
    """
    Get the vector column name

@@ -202,26 +228,21 @@ def inf_vector_column_query(schema: pa.Schema) -> str:
    -------
    str: the vector column name.
    """
-    vector_col_name = ""
-    vector_col_count = 0
-    for field_name in schema.names:
-        field = schema.field(field_name)
-        if is_vector_column(field.type):
-            vector_col_count += 1
-            if vector_col_count > 1:
-                raise ValueError(
-                    "Schema has more than one vector column. "
-                    "Please specify the vector column name "
-                    "for vector search"
-                )
-            elif vector_col_count == 1:
-                vector_col_name = field_name
-    if vector_col_count == 0:
+    vector_col_names = []
+    for field in schema:
+        vector_col_names.extend(_iter_vector_columns(field, [], dim))
+    if len(vector_col_names) > 1:
+        raise ValueError(
+            "Schema has more than one vector column. "
+            "Please specify the vector column name "
+            f"for vector search. Candidates: {vector_col_names}"
+        )
+    if len(vector_col_names) == 0:
        raise ValueError(
            "There is no vector column in the data. "
            "Please specify the vector column name for vector search"
        )
-    return vector_col_name
+    return vector_col_names[0]


 def is_vector_column(data_type: pa.DataType) -> bool:
@@ -247,6 +268,29 @@ def is_vector_column(data_type: pa.DataType) -> bool:
    return False


+def infer_vector_column_dim(data_type: pa.DataType) -> Optional[int]:
+    if pa.types.is_fixed_size_list(data_type):
+        return data_type.list_size
+    if pa.types.is_list(data_type):
+        return infer_vector_column_dim(data_type.value_type)
+    return None
+
+
+def _query_vector_dim(query: Optional[Any]) -> Optional[int]:
+    if query is None:
+        return None
+    if isinstance(query, np.ndarray):
+        if query.ndim == 0:
+            return None
+        return query.shape[-1]
+    if isinstance(query, list) and query:
+        first = query[0]
+        if isinstance(first, (list, tuple, np.ndarray)):
+            return len(first)
+        return len(query)
+    return None
+
+
 def infer_vector_column_name(
    schema: pa.Schema,
    query_type: str,
@@ -262,7 +306,9 @@ def infer_vector_column_name(

    if query is not None or query_type == "hybrid":
        try:
-            vector_column_name = inf_vector_column_query(schema)
+            vector_column_name = inf_vector_column_query(
+                schema, dim=_query_vector_dim(query)
+            )
        except Exception as e:
            raise e

--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -6,6 +6,7 @@ import re
 import sys
 from datetime import timedelta
 import os
+from types import SimpleNamespace

 import lancedb
 import numpy as np
@@ -188,6 +189,43 @@ def test_table_names(tmp_db: lancedb.DBConnection):
    assert len(result) == 3


+def test_db_contains_and_len_include_all_table_name_pages(tmp_db: lancedb.DBConnection):
+    for idx in range(20):
+        tmp_db.create_table(f"table_{idx}", data=[{"id": idx}])
+
+    assert len(tmp_db) == 20
+    for idx in range(20):
+        assert f"table_{idx}" in tmp_db
+    assert "does_not_exist" not in tmp_db
+
+
+def test_db_contains_stops_after_matching_table_page(
+    tmp_db: lancedb.DBConnection, monkeypatch
+):
+    calls = []
+    pages = {
+        None: SimpleNamespace(tables=["table_0", "table_1"], page_token="next"),
+        "next": SimpleNamespace(tables=["table_2"], page_token=None),
+    }
+
+    def list_tables(*, page_token=None, **_kwargs):
+        calls.append(page_token)
+        return pages[page_token]
+
+    monkeypatch.setattr(tmp_db, "list_tables", list_tables)
+
+    assert "table_1" in tmp_db
+    assert calls == [None]
+
+    calls.clear()
+    assert "table_2" in tmp_db
+    assert calls == [None, "next"]
+
+    calls.clear()
+    assert len(tmp_db) == 3
+    assert calls == [None, "next"]
+
+
@pytest.mark.asyncio
 async def test_table_names_async(tmp_path):
    db = lancedb.connect(tmp_path)
@@ -428,7 +466,8 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
    assert await tbl.uses_v2_manifest_paths()
    manifests_dir = tmp_path / "test_v2_manifest_paths.lance" / "_versions"
    for manifest in os.listdir(manifests_dir):
-        assert re.match(r"\d{20}\.manifest", manifest)
+        if manifest.endswith(".manifest"):
+            assert re.match(r"\d{20}\.manifest", manifest)

    # Start a table in V1 mode then migrate
    tbl = await db_no_v2_paths.create_table(
@@ -438,13 +477,15 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
    assert not await tbl.uses_v2_manifest_paths()
    manifests_dir = tmp_path / "test_v2_migration.lance" / "_versions"
    for manifest in os.listdir(manifests_dir):
-        assert re.match(r"\d\.manifest", manifest)
+        if manifest.endswith(".manifest"):
+            assert re.match(r"\d\.manifest", manifest)

    await tbl.migrate_manifest_paths_v2()
    assert await tbl.uses_v2_manifest_paths()

    for manifest in os.listdir(manifests_dir):
-        assert re.match(r"\d{20}\.manifest", manifest)
+        if manifest.endswith(".manifest"):
+            assert re.match(r"\d{20}\.manifest", manifest)


@pytest.mark.asyncio
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -563,8 +563,111 @@ def test_create_index_multiple_columns(tmp_path, table):


 def test_nested_schema(tmp_path, table):
-    with pytest.raises(ValueError, match="top-level fields"):
-        table.create_fts_index("nested.text")
+    table.create_fts_index("nested.text", with_position=True)
+    indices = table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type == "FTS"
+    assert indices[0].columns == ["nested.text"]
+
+    results = (
+        table.search("puppy", query_type="fts", fts_columns="nested.text")
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    results = table.search(MatchQuery("puppy", "nested.text")).limit(5).to_list()
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    phrase_results = (
+        table.search(PhraseQuery("puppy runs", "nested.text")).limit(5).to_list()
+    )
+    assert len(phrase_results) > 0
+    assert all("puppy runs" in row["nested"]["text"] for row in phrase_results)
+
+    hybrid_results = (
+        table.search(query_type="hybrid", fts_columns="nested.text")
+        .vector([0 for _ in range(128)])
+        .text("puppy")
+        .limit(5)
+        .to_list()
+    )
+    assert len(hybrid_results) > 0
+
+
+@pytest.mark.asyncio
+async def test_nested_schema_async(async_table):
+    await async_table.create_index("nested.text", config=FTS(with_position=True))
+    indices = await async_table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type == "FTS"
+    assert indices[0].columns == ["nested.text"]
+
+    results = await (
+        async_table.query()
+        .nearest_to_text("puppy", columns="nested.text")
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    results = await (
+        async_table.query()
+        .nearest_to_text(MatchQuery("puppy", "nested.text"))
+        .limit(5)
+        .to_list()
+    )
+    assert len(results) > 0
+    assert all("puppy" in row["nested"]["text"] for row in results)
+
+    phrase_results = await (
+        async_table.query()
+        .nearest_to_text(PhraseQuery("puppy runs", "nested.text"))
+        .limit(5)
+        .to_list()
+    )
+    assert len(phrase_results) > 0
+    assert all("puppy runs" in row["nested"]["text"] for row in phrase_results)
+
+    hybrid_results = await (
+        async_table.query()
+        .nearest_to([0 for _ in range(128)])
+        .nearest_to_text("puppy", columns="nested.text")
+        .limit(5)
+        .to_list()
+    )
+    assert len(hybrid_results) > 0
+
+
+def test_nested_schema_rejects_invalid_fts_fields(tmp_path):
+    db = ldb.connect(tmp_path)
+    data = pa.table(
+        {
+            "payload": pa.array(
+                [
+                    {"text": "puppy runs", "count": 1},
+                    {"text": "car drives", "count": 2},
+                ]
+            ),
+            "vector": pa.array(
+                [[0.1, 0.1], [0.2, 0.2]],
+                type=pa.list_(pa.float32(), list_size=2),
+            ),
+        }
+    )
+    table = db.create_table("test", data=data)
+
+    with pytest.raises(ValueError, match="FTS index cannot be created.*payload"):
+        table.create_fts_index("payload")
+
+    with pytest.raises(ValueError, match="FTS index cannot be created.*count"):
+        table.create_fts_index("payload.count")
+
+    with pytest.raises(ValueError, match="Field path `payload.missing` not found"):
+        table.create_fts_index("payload.missing")


 def test_search_index_with_filter(table):
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -105,6 +105,46 @@ async def test_create_scalar_index(some_table: AsyncTable):
    assert len(indices) == 0


+@pytest.mark.asyncio
+async def test_create_nested_scalar_index_lists_canonical_paths(db_async):
+    metadata_type = pa.struct(
+        [
+            pa.field("user_id", pa.int32()),
+            pa.field("user.id", pa.int32()),
+        ]
+    )
+    data = pa.Table.from_arrays(
+        [
+            pa.array([1, 2, 3], type=pa.int32()),
+            pa.array(
+                [
+                    {"user_id": 10, "user.id": 100},
+                    {"user_id": 20, "user.id": 200},
+                    {"user_id": 30, "user.id": 300},
+                ],
+                type=metadata_type,
+            ),
+        ],
+        names=["user_id", "metadata"],
+    )
+    table = await db_async.create_table("nested_scalar_index", data)
+
+    await table.create_index("user_id", config=BTree(), name="top_user_id_idx")
+    await table.create_index(
+        "metadata.user_id", config=BTree(), name="nested_user_id_idx"
+    )
+    await table.create_index(
+        "metadata.`user.id`", config=BTree(), name="escaped_user_id_idx"
+    )
+
+    columns_by_name = {
+        index.name: index.columns for index in await table.list_indices()
+    }
+    assert columns_by_name["top_user_id_idx"] == ["user_id"]
+    assert columns_by_name["nested_user_id_idx"] == ["metadata.user_id"]
+    assert columns_by_name["escaped_user_id_idx"] == ["metadata.`user.id`"]
+
+
@pytest.mark.asyncio
 async def test_create_fixed_size_binary_index(some_table: AsyncTable):
    await some_table.create_index("fsb", config=BTree())
--- a/python/python/tests/test_lsm_write_spec.py
+++ b/python/python/tests/test_lsm_write_spec.py
@@ -40,16 +40,6 @@ def _make_table(tmp_path):
 def test_set_lsm_write_spec_validates(tmp_path):
    _db, table = _make_table(tmp_path)

-    # No PK set yet.
-    with pytest.raises(Exception, match="primary key"):
-        table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
-
-    table.set_unenforced_primary_key("id")
-
-    # Column mismatch.
-    with pytest.raises(Exception, match="match"):
-        table.set_lsm_write_spec(LsmWriteSpec.bucket("v", 4))
-
    # Out-of-range num_buckets.
    with pytest.raises(Exception, match="num_buckets"):
        table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 0))
@@ -70,7 +60,6 @@ def test_unset_lsm_write_spec(tmp_path):
        table.unset_lsm_write_spec()

    # Install a spec, then remove it; afterwards a fresh spec can be set.
-    table.set_unenforced_primary_key("id")
    table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
    table.unset_lsm_write_spec()
    # A second unset errors — there is no spec left to remove.
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -165,6 +165,22 @@ def test_offset(table):
    assert len(results_with_offset.to_pandas()) == 1


+@pytest.mark.asyncio
+async def test_query_to_pandas_kwargs(table, table_async):
+    sync_df = (
+        LanceVectorQueryBuilder(table, [0, 0], "vector")
+        .select(["id"])
+        .limit(1)
+        .to_pandas(split_blocks=True)
+    )
+    assert sync_df["id"].tolist() == [1]
+
+    async_df = await (
+        table_async.query().select(["id"]).limit(2).to_pandas(split_blocks=True)
+    )
+    assert async_df["id"].tolist() == [1, 2]
+
+
 def test_order_by_plain_query(mem_db):
    table = mem_db.create_table(
        "test_order_by",
@@ -1496,6 +1512,37 @@ def test_take_queries(tmp_path):
    ]


+def test_take_queries_to_batches(tmp_path):
+    # Regression test for the sync take-query path: `to_batches` previously
+    # raised ``AttributeError: 'AsyncTakeQuery' object has no attribute
+    # 'execute'`` because the inherited ``BaseQueryBuilder.to_batches`` called
+    # ``execute`` on the async wrapper instead of the native query.
+    db = lancedb.connect(tmp_path)
+    data = pa.table({"idx": list(range(100)), "label": [str(i) for i in range(100)]})
+    table = db.create_table("test", data)
+
+    # Take by offset → to_batches
+    rs = list(table.take_offsets([5, 2, 17]).to_batches())
+    assert all(isinstance(b, pa.RecordBatch) for b in rs)
+    assert sum(b.num_rows for b in rs) == 3
+    assert sorted(v for b in rs for v in b.column("idx").to_pylist()) == [2, 5, 17]
+
+    # Take by row id → to_batches
+    rs = list(table.take_row_ids([5, 2, 17]).to_batches())
+    assert all(isinstance(b, pa.RecordBatch) for b in rs)
+    assert sum(b.num_rows for b in rs) == 3
+    assert sorted(v for b in rs for v in b.column("idx").to_pylist()) == [2, 5, 17]
+
+    # Take with select projection → to_batches preserves the projection
+    rs = list(table.take_row_ids([5, 2, 17]).select(["label"]).to_batches())
+    assert all(b.schema.names == ["label"] for b in rs)
+    assert sorted(v for b in rs for v in b.column("label").to_pylist()) == [
+        "17",
+        "2",
+        "5",
+    ]
+
+
 def test_getitems(tmp_path):
    db = lancedb.connect(tmp_path)
    data = pa.table(
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -269,6 +269,25 @@ def test_table_unimplemented_functions():
            table.to_pandas()


+def test_table_to_pandas_not_supported():
+    def handler(request):
+        if request.path == "/v1/table/test/create/?mode=create":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            request.wfile.write(b"{}")
+        else:
+            request.send_response(404)
+            request.end_headers()
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}])
+        with pytest.raises(NotImplementedError):
+            table.to_pandas()
+        with pytest.raises(NotImplementedError):
+            table.to_pandas(blob_mode="bytes", split_blocks=True)
+
+
 def test_table_add_in_threadpool():
    def handler(request):
        if request.path == "/v1/table/test/insert/":
@@ -343,6 +362,22 @@ def test_table_create_indices():
                    schema=dict(
                        fields=[
                            dict(name="id", type={"type": "int64"}, nullable=False),
+                            dict(name="text", type={"type": "string"}, nullable=False),
+                            dict(
+                                name="vector",
+                                type={
+                                    "type": "fixed_size_list",
+                                    "fields": [
+                                        dict(
+                                            name="item",
+                                            type={"type": "float"},
+                                            nullable=True,
+                                        )
+                                    ],
+                                    "length": 2,
+                                },
+                                nullable=False,
+                            ),
                        ]
                    ),
                )
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -603,3 +603,89 @@ def test_cross_encoder_reranker_return_all(tmp_path):
    assert "_relevance_score" in result.column_names
    assert "_score" in result.column_names
    assert "_distance" in result.column_names
+
+
+# ---------------------------------------------------------------------------
+# Regression tests for LinearCombinationReranker scoring bugs (issue #3154)
+# ---------------------------------------------------------------------------
+
+
+def test_linear_combination_best_match_ranks_first():
+    """
+    The document that is BOTH the closest vector match AND the only FTS match
+    must rank first.  Previously _combine_score subtracted from 1, inverting
+    the ranking so the worst document ranked highest.
+    """
+    reranker = LinearCombinationReranker(weight=0.7, return_score="all")
+
+    # rowid 0: perfect vector match, sole FTS match  → should rank 1st
+    # rowid 1: mediocre vector, no FTS match
+    # rowid 2: bad vector, no FTS match
+    vector_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0, 1, 2],
+            "_distance": [0.0, 0.5, 0.9],
+        }
+    )
+    fts_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0],
+            "_score": [1.0],
+        }
+    )
+
+    combined = reranker.merge_results(vector_results, fts_results, fill=1.0)
+    scores = dict(
+        zip(
+            combined["_rowid"].to_pylist(),
+            combined["_relevance_score"].to_pylist(),
+        )
+    )
+
+    # rowid 0 must have the highest relevance score
+    assert scores[0] > scores[1], (
+        f"Best match (rowid 0, score={scores[0]:.4f}) should beat "
+        f"mid match (rowid 1, score={scores[1]:.4f})"
+    )
+    assert scores[1] > scores[2], (
+        f"Mid match (rowid 1, score={scores[1]:.4f}) should beat "
+        f"bad match (rowid 2, score={scores[2]:.4f})"
+    )
+
+
+def test_linear_combination_missing_fts_is_penalised():
+    """
+    A document with no FTS match must score *lower* than a document that
+    has a mediocre FTS match, everything else being equal.  Previously
+    missing-FTS entries used fill=1.0 directly, which gave them a reward
+    (via the 1-(...) inversion) instead of a penalty.
+    """
+    reranker = LinearCombinationReranker(weight=0.5, return_score="all")
+
+    vector_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0, 1],
+            "_distance": [0.2, 0.2],  # identical vector scores
+        }
+    )
+    fts_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0],  # rowid 1 has no FTS match
+            "_score": [0.3],  # small FTS score
+        }
+    )
+
+    combined = reranker.merge_results(vector_results, fts_results, fill=1.0)
+    scores = dict(
+        zip(
+            combined["_rowid"].to_pylist(),
+            combined["_relevance_score"].to_pylist(),
+        )
+    )
+
+    # rowid 0 has a small FTS score; rowid 1 has none.
+    # Even a small FTS contribution should beat having none at all.
+    assert scores[0] > scores[1], (
+        f"Document with FTS score (rowid 0, {scores[0]:.4f}) should beat "
+        f"document with no FTS match (rowid 1, {scores[1]:.4f})"
+    )
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -33,7 +33,7 @@ def test_basic(mem_db: DBConnection):
    table = mem_db.create_table("test", data=data)

    assert table.name == "test"
-    assert "LanceTable(name='test', version=1, _conn=LanceDBConnection(" in repr(table)
+    assert "LanceTable(name='test', _conn=LanceDBConnection(" in repr(table)
    expected_schema = pa.schema(
        {
            "vector": pa.list_(pa.float32(), 2),
@@ -47,6 +47,85 @@ def test_basic(mem_db: DBConnection):
    assert table.to_arrow() == expected_data


+def test_table_to_pandas_default_matches_arrow(tmp_db: DBConnection):
+    pd = pytest.importorskip("pandas")
+    data = pa.table({"id": [1, 2], "text": ["one", "two"]})
+    table = tmp_db.create_table("test_to_pandas_old_call", data=data)
+
+    expected = data.to_pandas()
+    pd.testing.assert_frame_equal(table.to_pandas(), expected)
+
+
+def test_table_to_pandas_blob_bytes(tmp_db: DBConnection):
+    pytest.importorskip("lance")
+    data = pa.table(
+        {
+            "id": pa.array([1, 2], pa.int64()),
+            "blob": pa.array([b"hello", b"world"], pa.large_binary()),
+        },
+        schema=pa.schema(
+            [
+                pa.field("id", pa.int64()),
+                pa.field(
+                    "blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
+                ),
+            ]
+        ),
+    )
+    table = tmp_db.create_table("test_to_pandas_blob_bytes", data=data)
+
+    df = table.to_pandas(blob_mode="bytes")
+
+    assert df["blob"].tolist() == [b"hello", b"world"]
+
+
+def test_table_to_pandas_kwargs(tmp_db: DBConnection):
+    pd = pytest.importorskip("pandas")
+    data = pa.table({"id": pa.array([1, 2], pa.int64())})
+    table = tmp_db.create_table("test_to_pandas_kwargs", data=data)
+
+    df = table.to_pandas(types_mapper=pd.ArrowDtype)
+
+    assert str(df["id"].dtype) == "int64[pyarrow]"
+
+
+@pytest.mark.asyncio
+async def test_async_table_to_pandas_blob_bytes(tmp_db_async: AsyncConnection):
+    pytest.importorskip("lance")
+    data = pa.table(
+        {
+            "id": pa.array([1, 2], pa.int64()),
+            "blob": pa.array([b"hello", b"world"], pa.large_binary()),
+        },
+        schema=pa.schema(
+            [
+                pa.field("id", pa.int64()),
+                pa.field(
+                    "blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
+                ),
+            ]
+        ),
+    )
+    table = await tmp_db_async.create_table(
+        "test_async_to_pandas_blob_bytes", data=data
+    )
+
+    df = await table.to_pandas(blob_mode="bytes")
+
+    assert df["blob"].tolist() == [b"hello", b"world"]
+
+
+@pytest.mark.asyncio
+async def test_async_table_to_pandas_kwargs(tmp_db_async: AsyncConnection):
+    pd = pytest.importorskip("pandas")
+    data = pa.table({"id": pa.array([1, 2], pa.int64())})
+    table = await tmp_db_async.create_table("test_async_to_pandas_kwargs", data=data)
+
+    df = await table.to_pandas(types_mapper=pd.ArrowDtype)
+
+    assert str(df["id"].dtype) == "int64[pyarrow]"
+
+
 def test_create_table_infers_large_int_vectors(mem_db: DBConnection):
    data = [{"vector": [0, 300]}]

@@ -1811,6 +1890,59 @@ def test_create_scalar_index(mem_db: DBConnection):
    assert scalar_index.name == "custom_y_index"


+def test_create_index_nested_field_paths(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field("metadata", pa.struct([pa.field("user_id", pa.int32())])),
+            pa.field(
+                "image",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [
+            {
+                "metadata": {"user_id": i},
+                "image": {"embedding": [float(i), float(i + 1)]},
+            }
+            for i in range(256)
+        ],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_index_paths", data=data)
+
+    table.create_scalar_index("metadata.user_id", name="metadata_user_id_idx")
+    table.create_index(
+        vector_column_name="image.embedding",
+        num_partitions=1,
+        num_sub_vectors=1,
+        name="image_embedding_idx",
+    )
+
+    indices = sorted(table.list_indices(), key=lambda idx: idx.name)
+    assert [(idx.name, idx.index_type, idx.columns) for idx in indices] == [
+        ("image_embedding_idx", "IvfPq", ["image.embedding"]),
+        ("metadata_user_id_idx", "BTree", ["metadata.user_id"]),
+    ]
+
+    vector_results = (
+        table.search([0.0, 1.0], vector_column_name="image.embedding")
+        .limit(1)
+        .to_list()
+    )
+    assert len(vector_results) == 1
+    assert vector_results[0]["metadata"]["user_id"] == 0
+
+    default_vector_results = table.search([0.0, 1.0]).limit(1).to_list()
+    assert len(default_vector_results) == 1
+    assert default_vector_results[0]["metadata"]["user_id"] == 0
+
+    filtered_results = table.search().where("metadata.user_id = 42").limit(1).to_list()
+    assert len(filtered_results) == 1
+    assert filtered_results[0]["metadata"]["user_id"] == 42
+
+
 def test_empty_query(mem_db: DBConnection):
    table = mem_db.create_table(
        "my_table",
@@ -1885,6 +2017,74 @@ def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
        table.search(q).limit(1).to_arrow()


+def test_search_infers_single_nested_vector(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int32()),
+            pa.field(
+                "image",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [
+            {"id": 0, "image": {"embedding": [0.0, 1.0]}},
+            {"id": 1, "image": {"embedding": [10.0, 11.0]}},
+        ],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_vector_default_search", data=data)
+
+    result = table.search([0.0, 1.0]).limit(1).to_list()
+    assert result[0]["id"] == 0
+
+
+def test_search_nested_vector_multiple_candidates(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field(
+                "image",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+            pa.field(
+                "text",
+                pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
+            ),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [
+            {
+                "image": {"embedding": [0.0, 1.0]},
+                "text": {"embedding": [2.0, 3.0]},
+            }
+        ],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_vector_multiple_candidates", data=data)
+
+    with pytest.raises(ValueError, match="image.embedding.*text.embedding"):
+        table.search([0.0, 1.0]).limit(1).to_arrow()
+
+
+def test_search_nested_vector_no_candidates(mem_db: DBConnection):
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int32()),
+            pa.field("metadata", pa.struct([pa.field("label", pa.string())])),
+        ]
+    )
+    data = pa.Table.from_pylist(
+        [{"id": 0, "metadata": {"label": "cat"}}],
+        schema=schema,
+    )
+    table = mem_db.create_table("nested_vector_no_candidates", data=data)
+
+    with pytest.raises(ValueError, match="no vector column"):
+        table.search([0.0, 1.0]).limit(1).to_arrow()
+
+
 def test_compact_cleanup(tmp_db: DBConnection):
    pytest.importorskip("lance")
    table = tmp_db.create_table(
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.29.1-beta.0"
+version = "0.30.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -104,6 +104,7 @@ datafusion.workspace = true
 http-body = "1"                                        # Matching reqwest
 rstest = "0.23.0"
 test-log = "0.2"
+serial_test = "3"


 [features]
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -812,8 +812,7 @@ impl ConnectBuilder {
        self
    }

-    /// The interval at which to check for updates from other processes. This
-    /// only affects LanceDB OSS.
+    /// The interval at which to check for updates from other processes.
    ///
    /// If left unset, consistency is not checked. For maximum read
    /// performance, this is the default. For strong consistency, set this to
@@ -825,8 +824,11 @@ impl ConnectBuilder {
    /// This only affects read operations. Write operations are always
    /// consistent.
    ///
-    /// LanceDB Cloud uses eventual consistency under the hood, and is not
-    /// currently configurable.
+    /// # Cost
+    ///
+    /// Stronger consistency is not free. The smaller the interval, the more
+    /// often each read pays the cost of checking for updates against object
+    /// storage, raising per-read latency and cost.
    pub fn read_consistency_interval(
        mut self,
        read_consistency_interval: std::time::Duration,
@@ -886,6 +888,7 @@ impl ConnectBuilder {
            options.host_override,
            self.request.client_config,
            storage_options.into(),
+            self.request.read_consistency_interval,
        )?);
        Ok(Connection {
            internal,
--- a/rust/lancedb/src/data/scannable.rs
+++ b/rust/lancedb/src/data/scannable.rs
@@ -271,15 +271,26 @@ impl Scannable for WithEmbeddingsScannable {
                .map_err(|e| Error::Runtime {
                    message: format!("Task panicked during embedding computation: {}", e),
                })??;
-                // Cast columns to match the declared output schema. The data is
-                // identical but field metadata (e.g. nested nullability) may
-                // differ between the embedding function output and the table.
-                let columns: Vec<ArrayRef> = result
-                    .columns()
+                // Look up columns by name (not position) so the result matches
+                // the output schema even when columns appear in a different
+                // order — e.g. `add_columns` placed a new column after the
+                // embedding column, but the computed batch appends embeddings
+                // at the end. Cast per-column because field metadata (e.g.
+                // nested nullability) may also differ between the embedding
+                // function output and the table.
+                let columns: Vec<ArrayRef> = output_schema
+                    .fields()
                    .iter()
-                    .enumerate()
-                    .map(|(i, col)| {
-                        let target_type = output_schema.field(i).data_type();
+                    .map(|field| {
+                        let col = result.column_by_name(field.name()).ok_or_else(|| {
+                            Error::InvalidInput {
+                                message: format!(
+                                    "Column '{}' required by the table schema was not present in the input batch",
+                                    field.name()
+                                ),
+                            }
+                        })?;
+                        let target_type = field.data_type();
                        if col.data_type() == target_type {
                            Ok(col.clone())
                        } else {
@@ -964,5 +975,118 @@ mod tests {
                "Expected EmbeddingFunctionNotFound"
            );
        }
+
+        /// Regression test for https://github.com/lancedb/lancedb/issues/3136.
+        ///
+        /// When a column is added to the table after the embedding column via
+        /// schema evolution, the table schema becomes
+        /// `[..., embedding, extra]`. The input batch (without the embedding)
+        /// is `[..., extra]`, and `compute_embeddings_for_batch` appends the
+        /// embedding at the end giving `[..., extra, embedding]`. A positional
+        /// cast to the output schema would map `extra` onto `embedding` and
+        /// fail with a CastError. Columns must be matched by name.
+        #[tokio::test]
+        async fn test_with_embeddings_scannable_column_added_after_embedding() {
+            let input_schema = Arc::new(Schema::new(vec![
+                Field::new("text", DataType::Utf8, false),
+                Field::new("score", DataType::Float64, true),
+            ]));
+            let batch = RecordBatch::try_new(
+                input_schema.clone(),
+                vec![
+                    Arc::new(StringArray::from(vec!["hello", "world"])) as ArrayRef,
+                    Arc::new(arrow_array::Float64Array::from(vec![1.0, 2.0])) as ArrayRef,
+                ],
+            )
+            .unwrap();
+
+            let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
+            let embedding_def = EmbeddingDefinition::new("text", "mock", Some("text_vec"));
+
+            // Table schema: embedding column is BEFORE `score`, as would
+            // happen if `score` was added via `add_columns` after creating
+            // the table with an embedding on `text`.
+            let output_schema = Arc::new(Schema::new(vec![
+                Field::new("text", DataType::Utf8, false),
+                Field::new(
+                    "text_vec",
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("item", DataType::Float32, true)),
+                        4,
+                    ),
+                    false,
+                ),
+                Field::new("score", DataType::Float64, true),
+            ]));
+
+            let mut scannable = WithEmbeddingsScannable::with_schema(
+                Box::new(batch),
+                vec![(embedding_def, mock_embedding)],
+                output_schema.clone(),
+            )
+            .unwrap();
+
+            let stream = scannable.scan_as_stream();
+            let results: Vec<RecordBatch> = stream.try_collect().await.unwrap();
+            assert_eq!(results.len(), 1);
+
+            let result_batch = &results[0];
+            assert_eq!(result_batch.schema(), output_schema);
+            assert_eq!(result_batch.num_rows(), 2);
+            // Position 1 must actually hold the FixedSizeList embedding —
+            // not the score column reinterpreted by a permissive cast.
+            let embedding = result_batch
+                .column(1)
+                .as_any()
+                .downcast_ref::<arrow_array::FixedSizeListArray>()
+                .expect("position 1 should be a FixedSizeList embedding");
+            assert_eq!(embedding.value_length(), 4);
+            assert_eq!(embedding.null_count(), 0);
+        }
+
+        /// If the input batch is missing a non-embedding column required by
+        /// the table schema, we should return a clear error rather than
+        /// silently producing a malformed batch.
+        #[tokio::test]
+        async fn test_with_embeddings_scannable_missing_required_column() {
+            let input_schema =
+                Arc::new(Schema::new(vec![Field::new("text", DataType::Utf8, false)]));
+            let batch = RecordBatch::try_new(
+                input_schema,
+                vec![Arc::new(StringArray::from(vec!["hello", "world"])) as ArrayRef],
+            )
+            .unwrap();
+
+            let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
+            let embedding_def = EmbeddingDefinition::new("text", "mock", Some("text_vec"));
+
+            let output_schema = Arc::new(Schema::new(vec![
+                Field::new("text", DataType::Utf8, false),
+                Field::new(
+                    "text_vec",
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("item", DataType::Float32, true)),
+                        4,
+                    ),
+                    false,
+                ),
+                Field::new("score", DataType::Float64, true),
+            ]));
+
+            let mut scannable = WithEmbeddingsScannable::with_schema(
+                Box::new(batch),
+                vec![(embedding_def, mock_embedding)],
+                output_schema,
+            )
+            .unwrap();
+
+            let stream = scannable.scan_as_stream();
+            let results: Result<Vec<RecordBatch>> = stream.try_collect().await;
+            let err = results.expect_err("expected an error");
+            assert!(
+                matches!(&err, Error::InvalidInput { message } if message.contains("score")),
+                "expected InvalidInput about missing 'score' column, got: {err:?}"
+            );
+        }
    }
 }
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -23,17 +23,12 @@ impl VectorIndex {
            .fields
            .iter()
            .map(|field_id| {
-                manifest
-                    .schema
-                    .field_by_id(*field_id)
-                    .unwrap_or_else(|| {
-                        panic!(
-                            "field {field_id} of index {} must exist in schema",
-                            index.name
-                        )
-                    })
-                    .name
-                    .clone()
+                manifest.schema.field_path(*field_id).unwrap_or_else(|_| {
+                    panic!(
+                        "field {field_id} of index {} must exist in schema",
+                        index.name
+                    )
+                })
            })
            .collect();
        Self {
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -245,6 +245,9 @@ pub struct RestfulLanceDbClient<S: HttpSend = Sender> {
    pub(crate) sender: S,
    pub(crate) id_delimiter: String,
    pub(crate) header_provider: Option<Arc<dyn HeaderProvider>>,
+    /// Connection-level read consistency interval. Drives the
+    /// `x-lancedb-min-timestamp` freshness header sent on read requests.
+    pub(crate) read_consistency_interval: Option<Duration>,
 }

 impl<S: HttpSend> std::fmt::Debug for RestfulLanceDbClient<S> {
@@ -338,6 +341,7 @@ impl RestfulLanceDbClient<Sender> {
        host_override: Option<String>,
        default_headers: HeaderMap,
        client_config: ClientConfig,
+        read_consistency_interval: Option<Duration>,
    ) -> Result<Self> {
        // Get the timeouts
        let timeout =
@@ -435,6 +439,7 @@ impl RestfulLanceDbClient<Sender> {
                .clone()
                .unwrap_or("$".to_string()),
            header_provider: client_config.header_provider,
+            read_consistency_interval,
        })
    }
 }
@@ -840,6 +845,16 @@ pub mod test_utils {
    pub fn client_with_handler<T>(
        handler: impl Fn(reqwest::Request) -> http::response::Response<T> + Send + Sync + 'static,
    ) -> RestfulLanceDbClient<MockSender>
+    where
+        T: Into<reqwest::Body>,
+    {
+        client_with_handler_and_interval(handler, None)
+    }
+
+    pub fn client_with_handler_and_interval<T>(
+        handler: impl Fn(reqwest::Request) -> http::response::Response<T> + Send + Sync + 'static,
+        read_consistency_interval: Option<Duration>,
+    ) -> RestfulLanceDbClient<MockSender>
    where
        T: Into<reqwest::Body>,
    {
@@ -857,6 +872,7 @@ pub mod test_utils {
            },
            id_delimiter: "$".to_string(),
            header_provider: None,
+            read_consistency_interval,
        }
    }

@@ -881,6 +897,7 @@ pub mod test_utils {
            },
            id_delimiter: config.id_delimiter.unwrap_or_else(|| "$".to_string()),
            header_provider: config.header_provider,
+            read_consistency_interval: None,
        }
    }
 }
@@ -888,6 +905,7 @@ pub mod test_utils {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use serial_test::serial;
    use std::time::Duration;

    #[test]
@@ -1046,6 +1064,7 @@ mod tests {
            sender: Sender,
            id_delimiter: "+".to_string(),
            header_provider: Some(Arc::new(provider) as Arc<dyn HeaderProvider>),
+            read_consistency_interval: None,
        };

        // Apply dynamic headers
@@ -1081,6 +1100,7 @@ mod tests {
            sender: Sender,
            id_delimiter: "+".to_string(),
            header_provider: Some(Arc::new(provider) as Arc<dyn HeaderProvider>),
+            read_consistency_interval: None,
        };

        // Apply dynamic headers
@@ -1118,6 +1138,7 @@ mod tests {
            sender: Sender,
            id_delimiter: "+".to_string(),
            header_provider: Some(Arc::new(provider) as Arc<dyn HeaderProvider>),
+            read_consistency_interval: None,
        };

        // Header provider errors should fail the request
@@ -1143,6 +1164,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_none() {
        let config = ClientConfig::default();
        // Clear env vars that might be set from other tests
@@ -1155,6 +1177,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_from_env() {
        // SAFETY: This is only called in tests
        unsafe {
@@ -1169,6 +1192,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_from_env_key() {
        // SAFETY: This is only called in tests
        unsafe {
@@ -1189,6 +1213,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_direct_takes_precedence() {
        // SAFETY: This is only called in tests
        unsafe {
@@ -1206,6 +1231,7 @@ mod tests {
    }

    #[test]
+    #[serial(user_id_env)]
    fn test_resolve_user_id_empty_env_ignored() {
        // SAFETY: This is only called in tests
        unsafe {
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -206,6 +206,7 @@ impl RemoteDatabase {
        host_override: Option<String>,
        client_config: ClientConfig,
        options: RemoteOptions,
+        read_consistency_interval: Option<std::time::Duration>,
    ) -> Result<Self> {
        let parsed = super::client::parse_db_url(uri)?;
        let header_map = RestfulLanceDbClient::<Sender>::default_headers(
@@ -233,6 +234,7 @@ impl RemoteDatabase {
            host_override,
            header_map,
            client_config.clone(),
+            read_consistency_interval,
        )?;

        let table_cache = Cache::builder()
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -253,6 +253,36 @@ pub enum Filter {
    Datafusion(Expr),
 }

+/// A predicate for filtering rows in delete operations.
+///
+/// Accepts either a SQL string or a DataFusion [`Expr`]. Use the [`From`]
+/// implementations to convert from `&str` or `&Expr` automatically.
+/// See [`Table::delete`] for usage examples.
+pub enum Predicate<'a> {
+    /// A SQL predicate string
+    String(&'a str),
+    /// A DataFusion logical expression
+    Expr(&'a Expr),
+}
+
+impl<'a> From<&'a str> for Predicate<'a> {
+    fn from(s: &'a str) -> Self {
+        Predicate::String(s)
+    }
+}
+
+impl<'a> From<&'a String> for Predicate<'a> {
+    fn from(s: &'a String) -> Self {
+        Predicate::String(s.as_str())
+    }
+}
+
+impl<'a> From<&'a Expr> for Predicate<'a> {
+    fn from(e: &'a Expr) -> Self {
+        Predicate::Expr(e)
+    }
+}
+
 #[async_trait]
 pub trait Tags: Send + Sync {
    /// List the tags of the table.
@@ -282,17 +312,15 @@ pub use self::merge::MergeResult;
 /// date) and [`LsmWriteSpec::with_writer_config_defaults`] (default
 /// `ShardWriter` configuration recorded in the MemWAL index).
 ///
-/// All variants require the table to have an unenforced primary key.
-///
 /// Install a spec with [`Table::set_lsm_write_spec`] and remove it with
 /// [`Table::unset_lsm_write_spec`]. The actual `merge_insert` dispatch
 /// onto the MemWAL writer is a follow-up.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub enum LsmWriteSpec {
-    /// Hash-bucket sharding by the unenforced primary key column.
+    /// Hash-bucket sharding by a scalar column.
    ///
-    /// `column` must equal the table's currently-set single-column
-    /// unenforced primary key. `num_buckets` must be in `[1, 1024]`.
+    /// `column` must be a non-nested column with a supported scalar type.
+    /// `num_buckets` must be in `[1, 1024]`.
    /// Iceberg-compatible Murmur3-x86-32 (seed 0) is used so each row's
    /// `bucket(column, num_buckets)` value is stable across processes.
    Bucket {
@@ -491,8 +519,8 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {

    /// Add new records to the table.
    async fn add(&self, add: AddDataBuilder) -> Result<AddResult>;
-    /// Delete rows from the table.
-    async fn delete(&self, predicate: &str) -> Result<DeleteResult>;
+    /// Delete rows from the table matching the given [`Predicate`].
+    async fn delete(&self, predicate: Predicate<'_>) -> Result<DeleteResult>;
    /// Update rows in the table.
    async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult>;
    /// Create an index on the provided column(s).
@@ -656,6 +684,30 @@ mod test_utils {
            }
        }

+        pub fn new_with_handler_and_interval<T>(
+            name: impl Into<String>,
+            handler: impl Fn(reqwest::Request) -> http::Response<T> + Clone + Send + Sync + 'static,
+            read_consistency_interval: Option<std::time::Duration>,
+        ) -> Self
+        where
+            T: Into<reqwest::Body>,
+        {
+            let inner = Arc::new(
+                crate::remote::table::RemoteTable::new_mock_with_consistency_interval(
+                    name.into(),
+                    handler.clone(),
+                    read_consistency_interval,
+                ),
+            );
+            let database = Arc::new(crate::remote::db::RemoteDatabase::new_mock(handler));
+            Self {
+                inner,
+                database: Some(database),
+                // Registry is unused.
+                embedding_registry: Arc::new(MemoryRegistry::new()),
+            }
+        }
+
        pub fn new_with_handler_version<T>(
            name: impl Into<String>,
            version: semver::Version,
@@ -860,7 +912,8 @@ impl Table {
    /// Delete the rows from table that match the predicate.
    ///
    /// # Arguments
-    /// - `predicate` - The SQL predicate string to filter the rows to be deleted.
+    /// - `predicate` - A SQL string (`&str`) or DataFusion expression (`&Expr`)
+    ///   that selects the rows to delete.
    ///
    /// # Example
    ///
@@ -869,6 +922,7 @@ impl Table {
    /// # use arrow_array::{FixedSizeListArray, types::Float32Type, RecordBatch,
    /// #   RecordBatchIterator, Int32Array};
    /// # use arrow_schema::{Schema, Field, DataType};
+    /// use datafusion_expr::{col, lit};
    /// # tokio::runtime::Runtime::new().unwrap().block_on(async {
    /// let tmpdir = tempfile::tempdir().unwrap();
    /// let db = lancedb::connect(tmpdir.path().to_str().unwrap())
@@ -898,11 +952,17 @@ impl Table {
    ///     .execute()
    ///     .await
    ///     .unwrap();
+    ///
+    /// // Using a SQL string:
    /// tbl.delete("id > 5").await.unwrap();
+    ///
+    /// // Using a DataFusion expression:
+    /// let expr = col("id").lt(lit(4));
+    /// tbl.delete(&expr).await.unwrap();
    /// # });
    /// ```
-    pub async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
-        self.inner.delete(predicate).await
+    pub async fn delete(&self, predicate: impl Into<Predicate<'_>>) -> Result<DeleteResult> {
+        self.inner.delete(predicate.into()).await
    }

    /// Create an index on the provided column(s).
@@ -1298,21 +1358,15 @@ impl Table {
    ///
    /// [`LsmWriteSpec`] chooses one of three sharding strategies:
    ///
-    /// - [`LsmWriteSpec::bucket`] — hash-bucket writes by the single-column
-    ///   unenforced primary key.
+    /// - [`LsmWriteSpec::bucket`] — hash-bucket writes by a scalar column.
    /// - [`LsmWriteSpec::identity`] — shard by the raw value of a scalar column.
    /// - [`LsmWriteSpec::unsharded`] — route every write to a single shard.
    ///
-    /// All variants require the table to have an unenforced primary key
-    /// ([`Table::set_unenforced_primary_key`]); bucket sharding additionally
-    /// requires it to be the single column being bucketed.
-    ///
    /// # Example
    ///
    /// ```
    /// # use lancedb::table::{LsmWriteSpec, Table};
    /// # async fn example(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
-    /// table.set_unenforced_primary_key(["id"]).await?;
    /// table
    ///     .set_lsm_write_spec(
    ///         LsmWriteSpec::bucket("id", 16).with_maintained_indexes(["id_idx"]),
@@ -2171,6 +2225,33 @@ impl NativeTable {
        }
    }

+    fn resolve_index_field(
+        schema: &lance_core::datatypes::Schema,
+        column: &str,
+    ) -> Result<(String, Field)> {
+        lance_core::datatypes::parse_field_path(column).map_err(|e| Error::InvalidInput {
+            message: format!("Invalid field path `{}`: {}", column, e),
+        })?;
+
+        let field_path = schema
+            .resolve_case_insensitive(column)
+            .ok_or_else(|| Error::Schema {
+                message: format!(
+                    "Field path `{}` not found in schema. Available field paths: {}",
+                    column,
+                    schema.field_paths().join(", ")
+                ),
+            })?;
+        let field = field_path.last().expect("field path should be non-empty");
+        let path_segments = field_path
+            .iter()
+            .map(|field| field.name.as_str())
+            .collect::<Vec<_>>();
+        let canonical_path = lance_core::datatypes::format_field_path(&path_segments);
+
+        Ok((canonical_path, Field::from(*field)))
+    }
+
    // Convert LanceDB Index to Lance IndexParams
    async fn make_index_params(
        &self,
@@ -2661,15 +2742,13 @@ impl BaseTable for NativeTable {
                message: "Multi-column (composite) indices are not yet supported".to_string(),
            });
        }
-        let schema = self.schema().await?;
-
-        let field = schema.field_with_name(&opts.columns[0])?;
-
-        let lance_idx_params = self.make_index_params(field, opts.index.clone()).await?;
-        let index_type = self.get_index_type_for_field(field, &opts.index);
-        let columns = [field.name().as_str()];
        self.dataset.ensure_mutable()?;
        let mut dataset = (*self.dataset.get().await?).clone();
+        let (column, field) = Self::resolve_index_field(dataset.schema(), &opts.columns[0])?;
+
+        let lance_idx_params = self.make_index_params(&field, opts.index.clone()).await?;
+        let index_type = self.get_index_type_for_field(&field, &opts.index);
+        let columns = [column.as_str()];
        let mut builder = dataset
            .create_index_builder(&columns, index_type, lance_idx_params.as_ref())
            .train(opts.train)
@@ -2752,8 +2831,7 @@ impl BaseTable for NativeTable {
    }

    /// Delete rows from the table
-    async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
-        // Delegate to the submodule implementation
+    async fn delete(&self, predicate: Predicate<'_>) -> Result<DeleteResult> {
        delete::execute_delete(self, predicate).await
    }

@@ -2787,54 +2865,88 @@ impl BaseTable for NativeTable {
    async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
        let dataset = self.dataset.get().await?;
        let indices = dataset.load_indices().await?;
-        let results = futures::stream::iter(indices.as_slice()).then(|idx| async {
-
-            // skip Lance internal indexes
-            if idx.name == FRAG_REUSE_INDEX_NAME {
-                return None;
-            }
-
-            let stats = match dataset.index_statistics(idx.name.as_str()).await {
-                Ok(stats) => stats,
-                Err(e) => {
-                    log::warn!("Failed to get statistics for index {} ({}): {}", idx.name, idx.uuid, e);
+        let results = futures::stream::iter(indices.as_slice())
+            .then(|idx| async {
+                // skip Lance internal indexes
+                if idx.name == FRAG_REUSE_INDEX_NAME {
                    return None;
                }
-            };

-            let stats: serde_json::Value = match serde_json::from_str(&stats) {
-                Ok(stats) => stats,
-                Err(e) => {
-                    log::warn!("Failed to deserialize index statistics for index {} ({}): {}", idx.name, idx.uuid, e);
-                    return None;
-                }
-            };
+                let stats = match dataset.index_statistics(idx.name.as_str()).await {
+                    Ok(stats) => stats,
+                    Err(e) => {
+                        log::warn!(
+                            "Failed to get statistics for index {} ({}): {}",
+                            idx.name,
+                            idx.uuid,
+                            e
+                        );
+                        return None;
+                    }
+                };

-            let Some(index_type) = stats.get("index_type").and_then(|v| v.as_str()) else {
-                log::warn!("Index statistics was missing 'index_type' field for index {} ({})", idx.name, idx.uuid);
-                return None;
-            };
+                let stats: serde_json::Value = match serde_json::from_str(&stats) {
+                    Ok(stats) => stats,
+                    Err(e) => {
+                        log::warn!(
+                            "Failed to deserialize index statistics for index {} ({}): {}",
+                            idx.name,
+                            idx.uuid,
+                            e
+                        );
+                        return None;
+                    }
+                };

-            let index_type: crate::index::IndexType = match index_type.parse() {
-                Ok(index_type) => index_type,
-                Err(e) => {
-                    log::warn!("Failed to parse index type for index {} ({}): {}", idx.name, idx.uuid, e);
-                    return None;
-                }
-            };
-
-            let mut columns = Vec::with_capacity(idx.fields.len());
-            for field_id in &idx.fields {
-                let Some(field) = dataset.schema().field_by_id(*field_id) else {
-                    log::warn!("The index {} ({}) referenced a field with id {} which does not exist in the schema", idx.name, idx.uuid, field_id);
+                let Some(index_type) = stats.get("index_type").and_then(|v| v.as_str()) else {
+                    log::warn!(
+                        "Index statistics was missing 'index_type' field for index {} ({})",
+                        idx.name,
+                        idx.uuid
+                    );
                    return None;
                };
-                columns.push(field.name.clone());
-            }

-            let name = idx.name.clone();
-            Some(IndexConfig { index_type, columns, name })
-        }).collect::<Vec<_>>().await;
+                let index_type: crate::index::IndexType = match index_type.parse() {
+                    Ok(index_type) => index_type,
+                    Err(e) => {
+                        log::warn!(
+                            "Failed to parse index type for index {} ({}): {}",
+                            idx.name,
+                            idx.uuid,
+                            e
+                        );
+                        return None;
+                    }
+                };
+
+                let mut columns = Vec::with_capacity(idx.fields.len());
+                for field_id in &idx.fields {
+                    let field_path = match dataset.schema().field_path(*field_id) {
+                        Ok(field_path) => field_path,
+                        Err(e) => {
+                            log::warn!(
+                                "Failed to resolve field path for index {} ({}) field id {}: {}",
+                                idx.name,
+                                idx.uuid,
+                                field_id,
+                                e
+                            );
+                            return None;
+                        }
+                    };
+                    columns.push(field_path);
+                }
+
+                let name = idx.name.clone();
+                Some(IndexConfig {
+                    index_type,
+                    columns,
+                    name,
+                })
+            })
+            .collect::<Vec<_>>()
+            .await;

        Ok(results.into_iter().flatten().collect())
    }
@@ -3037,13 +3149,14 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
+    use std::collections::HashMap;
    use std::sync::Arc;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::time::Duration;

    use arrow_array::{
-        Array, BooleanArray, FixedSizeListArray, Int32Array, LargeStringArray, RecordBatch,
-        RecordBatchIterator, RecordBatchReader, StringArray,
+        Array, ArrayRef, BooleanArray, FixedSizeListArray, Int32Array, LargeStringArray,
+        RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray, StructArray,
        builder::{ListBuilder, StringBuilder},
    };
    use arrow_array::{BinaryArray, LargeBinaryArray};
@@ -3063,6 +3176,7 @@ mod tests {
    use crate::query::Select;
    use crate::query::{ExecutableQuery, QueryBase};
    use crate::test_utils::connection::new_test_connection;
+    use lance_index::scalar::FullTextSearchQuery;
    #[tokio::test]
    async fn test_open() {
        let tmp_dir = tempdir().unwrap();
@@ -3650,6 +3764,222 @@ mod tests {
        assert_eq!(stats.num_unindexed_rows, 0);
    }

+    #[tokio::test]
+    async fn test_create_index_nested_field_paths() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let conn = ConnectBuilder::new(uri).execute().await.unwrap();
+
+        let num_rows = 512;
+        let dimension = 8;
+
+        let metadata = Arc::new(StructArray::from(vec![(
+            Arc::new(Field::new("user_id", DataType::Int32, false)),
+            Arc::new(Int32Array::from_iter_values(0..num_rows)) as ArrayRef,
+        )]));
+
+        let vector_values = arrow_array::Float32Array::from_iter_values(
+            (0..num_rows * dimension).map(|v| v as f32),
+        );
+        let embeddings =
+            Arc::new(create_fixed_size_list(vector_values, dimension).unwrap()) as ArrayRef;
+        let image = Arc::new(StructArray::from(vec![(
+            Arc::new(Field::new(
+                "embedding",
+                embeddings.data_type().clone(),
+                false,
+            )),
+            embeddings,
+        )]));
+
+        let payload = Arc::new(StructArray::from(vec![(
+            Arc::new(Field::new("text", DataType::Utf8, false)),
+            Arc::new(StringArray::from_iter_values(
+                (0..num_rows).map(|i| format!("document {}", i)),
+            )) as ArrayRef,
+        )]));
+
+        let meta_data = Arc::new(StructArray::from(vec![(
+            Arc::new(Field::new("user-id", DataType::Int32, false)),
+            Arc::new(Int32Array::from_iter_values(0..num_rows)) as ArrayRef,
+        )]));
+
+        let literal = Arc::new(StructArray::from(vec![(
+            Arc::new(Field::new("a.b", DataType::Int32, false)),
+            Arc::new(Int32Array::from_iter_values(0..num_rows)) as ArrayRef,
+        )]));
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("metadata", metadata.data_type().clone(), false),
+            Field::new("image", image.data_type().clone(), false),
+            Field::new("payload", payload.data_type().clone(), false),
+            Field::new("meta-data", meta_data.data_type().clone(), false),
+            Field::new("literal", literal.data_type().clone(), false),
+        ]));
+        let batch =
+            RecordBatch::try_new(schema, vec![metadata, image, payload, meta_data, literal])
+                .unwrap();
+
+        let table = conn
+            .create_table("nested_index_paths", batch)
+            .execute()
+            .await
+            .unwrap();
+
+        table
+            .create_index(
+                &["metadata.user_id"],
+                Index::BTree(BTreeIndexBuilder::default()),
+            )
+            .name("metadata_user_id_idx".to_string())
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(&["image.embedding"], Index::Auto)
+            .name("image_embedding_idx".to_string())
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(&["payload.text"], Index::FTS(Default::default()))
+            .name("payload_text_idx".to_string())
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(
+                &["`meta-data`.`user-id`"],
+                Index::BTree(BTreeIndexBuilder::default()),
+            )
+            .name("escaped_names_idx".to_string())
+            .execute()
+            .await
+            .unwrap();
+        table
+            .create_index(
+                &["literal.`a.b`"],
+                Index::BTree(BTreeIndexBuilder::default()),
+            )
+            .name("literal_dot_idx".to_string())
+            .execute()
+            .await
+            .unwrap();
+
+        let mut index_configs = table.list_indices().await.unwrap();
+        index_configs.sort_by(|left, right| left.name.cmp(&right.name));
+
+        let indexed_columns = index_configs
+            .iter()
+            .map(|index| {
+                (
+                    index.name.as_str(),
+                    index.columns.as_slice(),
+                    index.index_type.clone(),
+                )
+            })
+            .collect::<Vec<_>>();
+        assert_eq!(
+            indexed_columns,
+            vec![
+                (
+                    "escaped_names_idx",
+                    &["`meta-data`.`user-id`".to_string()][..],
+                    crate::index::IndexType::BTree,
+                ),
+                (
+                    "image_embedding_idx",
+                    &["image.embedding".to_string()][..],
+                    crate::index::IndexType::IvfPq,
+                ),
+                (
+                    "literal_dot_idx",
+                    &["literal.`a.b`".to_string()][..],
+                    crate::index::IndexType::BTree,
+                ),
+                (
+                    "metadata_user_id_idx",
+                    &["metadata.user_id".to_string()][..],
+                    crate::index::IndexType::BTree,
+                ),
+                (
+                    "payload_text_idx",
+                    &["payload.text".to_string()][..],
+                    crate::index::IndexType::FTS,
+                ),
+            ]
+        );
+
+        let vector_results = table
+            .query()
+            .nearest_to(&[0.0; 8])
+            .unwrap()
+            .column("image.embedding")
+            .limit(1)
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert_eq!(
+            vector_results
+                .iter()
+                .map(|batch| batch.num_rows())
+                .sum::<usize>(),
+            1
+        );
+
+        let default_vector_results = table
+            .query()
+            .nearest_to(&[0.0; 8])
+            .unwrap()
+            .limit(1)
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert_eq!(
+            default_vector_results
+                .iter()
+                .map(|batch| batch.num_rows())
+                .sum::<usize>(),
+            1
+        );
+
+        let fts_results = table
+            .query()
+            .full_text_search(FullTextSearchQuery::new("document".to_string()))
+            .limit(5)
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert!(!fts_results.is_empty());
+
+        let filtered_results = table
+            .query()
+            .only_if("metadata.user_id = 42")
+            .limit(1)
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert_eq!(
+            filtered_results
+                .iter()
+                .map(|batch| batch.num_rows())
+                .sum::<usize>(),
+            1
+        );
+    }
+
    #[tokio::test]
    async fn test_create_bitmap_index() {
        let tmp_dir = tempdir().unwrap();
@@ -4323,21 +4653,6 @@ mod tests {
            .unwrap();
        let table = conn.create_table("t", reader).execute().await.unwrap();

-        // Reject when no PK is set.
-        let err = table
-            .set_lsm_write_spec(LsmWriteSpec::bucket("id", 4))
-            .await
-            .expect_err("should reject without PK");
-        assert!(matches!(err, Error::Lance { .. }), "got {:?}", err);
-
-        // Set PK, then a mismatched column on the spec must be rejected.
-        table.set_unenforced_primary_key(["id"]).await.unwrap();
-        let err = table
-            .set_lsm_write_spec(LsmWriteSpec::bucket("name", 4))
-            .await
-            .expect_err("should reject column != PK");
-        assert!(matches!(err, Error::Lance { .. }), "got {:?}", err);
-
        // Reject num_buckets out of range.
        for bad in [0u32, 1025] {
            let err = table
@@ -4403,9 +4718,6 @@ mod tests {
            .unwrap();
        let table = conn.create_table("t", reader).execute().await.unwrap();

-        // Lance's MemWAL still requires *some* unenforced primary key on
-        // the dataset; Unsharded just skips the per-row hashing step.
-        table.set_unenforced_primary_key(["id"]).await.unwrap();
        table
            .set_lsm_write_spec(LsmWriteSpec::unsharded())
            .await
@@ -4452,7 +4764,6 @@ mod tests {
            .unwrap();
        let table = conn.create_table("t", reader).execute().await.unwrap();

-        table.set_unenforced_primary_key(["id"]).await.unwrap();
        table
            .set_lsm_write_spec(
                LsmWriteSpec::identity("region")
@@ -4508,7 +4819,6 @@ mod tests {
        table.unset_lsm_write_spec().await.unwrap_err();

        // Install a spec, then unset it.
-        table.set_unenforced_primary_key(["id"]).await.unwrap();
        table
            .set_lsm_write_spec(LsmWriteSpec::bucket("id", 4))
            .await
--- a/rust/lancedb/src/table/add_data.rs
+++ b/rust/lancedb/src/table/add_data.rs
@@ -268,7 +268,9 @@ mod tests {
    };
    use crate::query::{ExecutableQuery, QueryBase, Select};
    use crate::table::add_data::NaNVectorBehavior;
-    use crate::table::{ColumnDefinition, ColumnKind, Table, TableDefinition, WriteOptions};
+    use crate::table::{
+        ColumnDefinition, ColumnKind, NewColumnTransform, Table, TableDefinition, WriteOptions,
+    };
    use crate::test_utils::TestCustomError;
    use crate::test_utils::embeddings::MockEmbed;

@@ -518,6 +520,225 @@ mod tests {
        }
    }

+    /// Regression test for https://github.com/lancedb/lancedb/issues/3136.
+    ///
+    /// When a column is added via `add_columns` AFTER an embedding column,
+    /// the table schema becomes `[..., embedding, extra]`. Subsequent
+    /// `table.add()` calls used to fail with a CastError because columns
+    /// were matched positionally rather than by name.
+    #[tokio::test]
+    async fn test_add_with_embeddings_after_add_columns() {
+        let registry = Arc::new(MemoryRegistry::new());
+        let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
+        registry.register("mock", mock_embedding).unwrap();
+
+        let conn = connect("memory://")
+            .embedding_registry(registry)
+            .execute()
+            .await
+            .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("text", DataType::Utf8, false),
+            Field::new(
+                "text_vec",
+                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 4),
+                false,
+            ),
+        ]));
+
+        let embedding_def = EmbeddingDefinition::new("text", "mock", Some("text_vec"));
+        let table_def = TableDefinition::new(
+            schema.clone(),
+            vec![
+                ColumnDefinition {
+                    kind: ColumnKind::Physical,
+                },
+                ColumnDefinition {
+                    kind: ColumnKind::Embedding(embedding_def),
+                },
+            ],
+        );
+        let rich_schema = table_def.into_rich_schema();
+
+        let table = conn
+            .create_empty_table("embed_evol_test", rich_schema)
+            .execute()
+            .await
+            .unwrap();
+
+        // Seed a row so add_columns has data to compute against.
+        let seed_batch = record_batch!(("text", Utf8, ["hello"])).unwrap();
+        table.add(seed_batch).execute().await.unwrap();
+
+        // Add a new physical column AFTER the embedding column.
+        table
+            .add_columns(
+                NewColumnTransform::SqlExpressions(vec![("score".into(), "42.0".into())]),
+                None,
+            )
+            .await
+            .unwrap();
+
+        // Now add data including the new column but WITHOUT the embedding.
+        // The input batch column order is [text, score]; after computing the
+        // embedding it becomes [text, score, text_vec], but the table schema
+        // is [text, text_vec, score]. Columns must be matched by name.
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("text", DataType::Utf8, false),
+            Field::new("score", DataType::Float64, true),
+        ]));
+        let new_batch = RecordBatch::try_new(
+            new_schema,
+            vec![
+                Arc::new(arrow_array::StringArray::from(vec!["foo", "bar"])),
+                Arc::new(arrow_array::Float64Array::from(vec![1.0, 2.0])),
+            ],
+        )
+        .unwrap();
+        table.add(new_batch).execute().await.unwrap();
+
+        assert_eq!(table.count_rows(None).await.unwrap(), 3);
+
+        let results: Vec<RecordBatch> = table
+            .query()
+            .select(Select::columns(&["text", "text_vec", "score"]))
+            .execute()
+            .await
+            .unwrap()
+            .try_collect()
+            .await
+            .unwrap();
+
+        let total_rows: usize = results.iter().map(|b| b.num_rows()).sum();
+        assert_eq!(total_rows, 3);
+        for batch in &results {
+            // text_vec must be populated for the newly added rows too.
+            assert_eq!(batch.column(1).null_count(), 0);
+        }
+    }
+
+    /// Like `test_add_with_embeddings_after_add_columns`, but the column
+    /// added after the embedding is a nested struct rather than a scalar.
+    /// Verifies that name-based column matching also works when the
+    /// post-embedding column has a complex Arrow type.
+    #[tokio::test]
+    async fn test_add_with_embeddings_after_add_nested_columns() {
+        let registry = Arc::new(MemoryRegistry::new());
+        let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
+        registry.register("mock", mock_embedding).unwrap();
+
+        let conn = connect("memory://")
+            .embedding_registry(registry)
+            .execute()
+            .await
+            .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("text", DataType::Utf8, false),
+            Field::new(
+                "text_vec",
+                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 4),
+                false,
+            ),
+        ]));
+
+        let embedding_def = EmbeddingDefinition::new("text", "mock", Some("text_vec"));
+        let table_def = TableDefinition::new(
+            schema,
+            vec![
+                ColumnDefinition {
+                    kind: ColumnKind::Physical,
+                },
+                ColumnDefinition {
+                    kind: ColumnKind::Embedding(embedding_def),
+                },
+            ],
+        );
+        let rich_schema = table_def.into_rich_schema();
+
+        let table = conn
+            .create_empty_table("embed_nested_test", rich_schema)
+            .execute()
+            .await
+            .unwrap();
+
+        let seed_batch = record_batch!(("text", Utf8, ["hello"])).unwrap();
+        table.add(seed_batch).execute().await.unwrap();
+
+        // Add a STRUCT column after the embedding column.
+        let meta_struct = DataType::Struct(
+            vec![
+                Field::new("source", DataType::Utf8, true),
+                Field::new("score", DataType::Float64, true),
+            ]
+            .into(),
+        );
+        let nested_schema = Arc::new(Schema::new(vec![Field::new(
+            "meta",
+            meta_struct.clone(),
+            true,
+        )]));
+        table
+            .add_columns(NewColumnTransform::AllNulls(nested_schema), None)
+            .await
+            .unwrap();
+
+        // Insert with the nested struct present but the embedding column
+        // absent. The computed batch is [text, meta, text_vec], but the
+        // table schema is [text, text_vec, meta] — only name-based matching
+        // can put `meta` (a struct) in the right slot.
+        let source = Arc::new(arrow_array::StringArray::from(vec!["foo", "bar"]));
+        let score = Arc::new(arrow_array::Float64Array::from(vec![1.0, 2.0]));
+        let meta = Arc::new(arrow_array::StructArray::from(vec![
+            (
+                Arc::new(Field::new("source", DataType::Utf8, true)),
+                source as Arc<dyn arrow_array::Array>,
+            ),
+            (
+                Arc::new(Field::new("score", DataType::Float64, true)),
+                score as Arc<dyn arrow_array::Array>,
+            ),
+        ]));
+        let new_schema = Arc::new(Schema::new(vec![
+            Field::new("text", DataType::Utf8, false),
+            Field::new("meta", meta_struct, true),
+        ]));
+        let new_batch = RecordBatch::try_new(
+            new_schema,
+            vec![
+                Arc::new(arrow_array::StringArray::from(vec!["foo", "bar"])),
+                meta,
+            ],
+        )
+        .unwrap();
+        table.add(new_batch).execute().await.unwrap();
+
+        assert_eq!(table.count_rows(None).await.unwrap(), 3);
+
+        let results: Vec<RecordBatch> = table
+            .query()
+            .select(Select::columns(&["text", "text_vec", "meta"]))
+            .execute()
+            .await
+            .unwrap()
+            .try_collect()
+            .await
+            .unwrap();
+
+        let total_rows: usize = results.iter().map(|b| b.num_rows()).sum();
+        assert_eq!(total_rows, 3);
+        for batch in &results {
+            assert_eq!(batch.schema().field(2).name(), "meta");
+            assert!(matches!(
+                batch.schema().field(2).data_type(),
+                DataType::Struct(_)
+            ));
+            // text_vec must be populated for the newly added rows too.
+            assert_eq!(batch.column(1).null_count(), 0);
+        }
+    }
+
    #[tokio::test]
    async fn test_add_casts_to_table_schema() {
        let table_schema = Arc::new(Schema::new(vec![
@@ -761,4 +982,105 @@ mod tests {
        table2.add(struct_batch).execute().await.unwrap();
        assert_eq!(table2.count_rows(None).await.unwrap(), 2);
    }
+
+    /// Regression test: appending `arrow.json` (PyArrow `pa.json_()`) data into a table
+    /// whose schema was created with `pa.json_()` (internally stored as `lance.json`, backed
+    /// by `LargeBinary`) must succeed without a schema-mismatch error.
+    ///
+    /// Previously `build_field_exprs` would attempt a `Utf8 → LargeBinary` DataFusion cast,
+    /// which produced a field whose Arrow extension metadata still read `arrow.json` instead
+    /// of `lance.json`.  Lance-core then rejected the append with
+    /// `"json vs large_binary" schema mismatch`.
+    ///
+    /// PyArrow's `pa.json_()` may be backed by either `Utf8` or `LargeUtf8` depending on the
+    /// constructor used, so the test is parameterized over the input backing type.
+    #[rstest::rstest]
+    #[case::utf8(DataType::Utf8)]
+    #[case::large_utf8(DataType::LargeUtf8)]
+    #[tokio::test]
+    async fn test_add_arrow_json_into_lance_json_table(#[case] input_type: DataType) {
+        use arrow_array::{Array, cast::AsArray};
+        use lance_arrow::ARROW_EXT_NAME_KEY;
+        use lance_arrow::json::{ARROW_JSON_EXT_NAME, JSON_EXT_NAME};
+
+        // Build a table whose "data" column is lance.json (LargeBinary +
+        // ARROW:extension:name = "lance.json").
+        let lance_json_field = lance_arrow::json::json_field("data", true);
+        let table_schema = Arc::new(Schema::new(vec![lance_json_field]));
+
+        let db = connect("memory://").execute().await.unwrap();
+        let table = db
+            .create_empty_table("json_test", table_schema)
+            .execute()
+            .await
+            .unwrap();
+
+        // Sanity-check the stored schema.
+        let stored_field = table.schema().await.unwrap();
+        let data_field = stored_field.field_with_name("data").unwrap();
+        assert_eq!(data_field.data_type(), &DataType::LargeBinary);
+        assert_eq!(
+            data_field
+                .metadata()
+                .get(ARROW_EXT_NAME_KEY)
+                .map(|s| s.as_str()),
+            Some(JSON_EXT_NAME),
+        );
+
+        // Build an arrow.json input field (Utf8/LargeUtf8 + arrow.json extension).
+        // This is what PyArrow produces for pa.json_() arrays.
+        let arrow_json_metadata = std::collections::HashMap::from([(
+            ARROW_EXT_NAME_KEY.to_string(),
+            ARROW_JSON_EXT_NAME.to_string(),
+        )]);
+        let arrow_json_field =
+            Field::new("data", input_type.clone(), true).with_metadata(arrow_json_metadata);
+        let arrow_json_schema = Arc::new(Schema::new(vec![arrow_json_field]));
+
+        let rows: Vec<Option<&str>> = vec![None, Some(r#"{"a": 1}"#), Some(r#"{"b": 2}"#)];
+        let string_array: Arc<dyn arrow_array::Array> = match input_type {
+            DataType::Utf8 => Arc::new(arrow_array::StringArray::from(rows.clone())),
+            DataType::LargeUtf8 => Arc::new(arrow_array::LargeStringArray::from(rows.clone())),
+            other => panic!("unsupported arrow.json backing type for this test: {other:?}"),
+        };
+        let batch = RecordBatch::try_new(arrow_json_schema, vec![string_array]).unwrap();
+
+        // This must not fail with a schema-mismatch error.
+        table.add(batch).execute().await.unwrap();
+
+        assert_eq!(table.count_rows(None).await.unwrap(), rows.len());
+
+        // A lance.json column is read back as Utf8 carrying arrow.json extension metadata.
+        let results: Vec<RecordBatch> = table
+            .query()
+            .select(Select::columns(&["data"]))
+            .execute()
+            .await
+            .unwrap()
+            .try_collect()
+            .await
+            .unwrap();
+
+        assert_eq!(results.len(), 1);
+        let batch = &results[0];
+        assert_eq!(batch.num_rows(), rows.len());
+
+        let json_col = batch.column(0);
+        assert_eq!(json_col.data_type(), &DataType::Utf8);
+        let json_strs = json_col.as_string::<i32>();
+
+        for (i, expected) in rows.iter().enumerate() {
+            match expected {
+                None => assert!(json_strs.is_null(i), "row {i} expected null"),
+                Some(raw) => {
+                    assert!(!json_strs.is_null(i), "row {i} expected non-null");
+                    let actual: serde_json::Value = serde_json::from_str(json_strs.value(i))
+                        .expect("read-back JSON should be valid");
+                    let expected: serde_json::Value =
+                        serde_json::from_str(raw).expect("expected JSON should be valid");
+                    assert_eq!(actual, expected, "row {i} JSON mismatch");
+                }
+            }
+        }
+    }
 }
--- a/rust/lancedb/src/table/datafusion/cast.rs
+++ b/rust/lancedb/src/table/datafusion/cast.rs
@@ -13,6 +13,7 @@ use datafusion_physical_expr::expressions::{CastExpr, Literal};
 use datafusion_physical_plan::expressions::Column;
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr};
+use lance_arrow::json::{is_arrow_json_field, is_json_field};

 use crate::{Error, Result};

@@ -64,6 +65,18 @@ fn build_field_exprs(
        let input_field = &input_fields[input_idx];
        let input_expr = get_input_expr(input_idx);

+        // Special case: input is arrow.json (PyArrow pa.json_() extension type backed by
+        // Utf8/LargeUtf8) and the table field is lance.json (backed by LargeBinary).
+        // Lance-core's write path already handles the arrow.json → lance.json conversion
+        // (including JSONB encoding), so we pass the expression through unchanged and let
+        // lance-core deal with it. Attempting to cast Utf8 → LargeBinary here would
+        // produce a field whose metadata still identifies it as arrow.json, which then
+        // causes a schema-mismatch error inside lance-core.
+        if is_arrow_json_field(input_field) && is_json_field(table_field) {
+            result.push((input_expr, Arc::clone(input_field) as FieldRef));
+            continue;
+        }
+
        let expr = match (input_field.data_type(), table_field.data_type()) {
            // Both are structs: recurse into sub-fields to handle subschemas and casts.
            (DataType::Struct(in_children), DataType::Struct(tbl_children))
@@ -618,4 +631,75 @@ mod tests {
            .unwrap();
        assert_eq!(a.values(), &[1, 3]);
    }
+
+    /// `arrow.json` input (PyArrow `pa.json_()`, Utf8/LargeUtf8 + extension metadata) against a
+    /// `lance.json` table field (LargeBinary + extension metadata) must be passed through
+    /// without a cast so that lance-core can perform its own arrow.json → JSONB conversion.
+    ///
+    /// Before the fix, `cast_to_table_schema` attempted a `Utf8 → LargeBinary` DataFusion
+    /// cast that preserved the wrong extension metadata, causing lance-core to reject the
+    /// batch with a "json vs large_binary" schema-mismatch error.
+    #[rstest::rstest]
+    #[case::utf8(DataType::Utf8)]
+    #[case::large_utf8(DataType::LargeUtf8)]
+    #[tokio::test]
+    async fn test_arrow_json_passthrough_to_lance_json(#[case] input_type: DataType) {
+        use lance_arrow::ARROW_EXT_NAME_KEY;
+        use lance_arrow::json::{ARROW_JSON_EXT_NAME, json_field};
+
+        // Build a table schema with a lance.json field (LargeBinary + lance.json metadata).
+        let lance_field = json_field("data", true);
+        let table_schema = Schema::new(vec![lance_field]);
+
+        // Build an input batch with an arrow.json field (Utf8/LargeUtf8 + arrow.json metadata).
+        let arrow_meta = std::collections::HashMap::from([(
+            ARROW_EXT_NAME_KEY.to_string(),
+            ARROW_JSON_EXT_NAME.to_string(),
+        )]);
+        let arrow_field = Field::new("data", input_type.clone(), true).with_metadata(arrow_meta);
+        let input_schema = Arc::new(Schema::new(vec![arrow_field]));
+
+        let values = vec![Some(r#"{"x": 1}"#), None, Some(r#"{"y": 2}"#)];
+        let input_array: Arc<dyn arrow_array::Array> = match input_type {
+            DataType::Utf8 => Arc::new(StringArray::from(values)),
+            DataType::LargeUtf8 => Arc::new(arrow_array::LargeStringArray::from(values)),
+            other => panic!("unsupported arrow.json backing type for this test: {other:?}"),
+        };
+        let input_batch = RecordBatch::try_new(input_schema, vec![input_array]).unwrap();
+
+        let plan = plan_from_batch(input_batch).await;
+        let projected = cast_to_table_schema(plan, &table_schema).unwrap();
+
+        // The projected schema's "data" field must carry arrow.json metadata
+        // (the input field), not be silently dropped or miscast.
+        let out_field = projected.schema().field_with_name("data").unwrap().clone();
+        assert_eq!(out_field.data_type(), &input_type);
+        assert_eq!(
+            out_field
+                .metadata()
+                .get(ARROW_EXT_NAME_KEY)
+                .map(|s| s.as_str()),
+            Some(ARROW_JSON_EXT_NAME),
+            "output field must still carry arrow.json metadata so lance-core can handle it"
+        );
+
+        // The data must flow through correctly (3 rows, no panic).
+        let result = collect(projected).await;
+        assert_eq!(result.num_rows(), 3);
+        let (v0, v2) = match input_type {
+            DataType::Utf8 => {
+                let col: &StringArray = result.column(0).as_any().downcast_ref().unwrap();
+                (col.value(0).to_string(), col.value(2).to_string())
+            }
+            DataType::LargeUtf8 => {
+                let col: &arrow_array::LargeStringArray =
+                    result.column(0).as_any().downcast_ref().unwrap();
+                (col.value(0).to_string(), col.value(2).to_string())
+            }
+            _ => unreachable!(),
+        };
+        assert_eq!(v0, r#"{"x": 1}"#);
+        assert!(result.column(0).is_null(1));
+        assert_eq!(v2, r#"{"y": 2}"#);
+    }
 }
--- a/rust/lancedb/src/table/delete.rs
+++ b/rust/lancedb/src/table/delete.rs
@@ -1,9 +1,12 @@
+use std::sync::Arc;
+
 use futures::FutureExt;
+use lance::dataset::DeleteBuilder;
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 use serde::{Deserialize, Serialize};

-use super::NativeTable;
+use super::{NativeTable, Predicate};
 use crate::Result;

 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
@@ -21,17 +24,39 @@ pub struct DeleteResult {
 /// Internal implementation of the delete logic
 ///
 /// This logic was moved from NativeTable::delete to keep table.rs clean.
-pub(crate) async fn execute_delete(table: &NativeTable, predicate: &str) -> Result<DeleteResult> {
+pub(crate) async fn execute_delete(
+    table: &NativeTable,
+    predicate: Predicate<'_>,
+) -> Result<DeleteResult> {
    table.dataset.ensure_mutable()?;
-    let mut dataset = (*table.dataset.get().await?).clone();
-    let delete_result = dataset.delete(predicate).boxed().await?;
-    let num_deleted_rows = delete_result.num_deleted_rows;
-    let version = dataset.version().version;
-    table.dataset.update(dataset);
-    Ok(DeleteResult {
-        num_deleted_rows,
-        version,
-    })
+    match predicate {
+        Predicate::String(s) => {
+            let mut dataset = (*table.dataset.get().await?).clone();
+            let delete_result = dataset.delete(s).boxed().await?;
+            let num_deleted_rows = delete_result.num_deleted_rows;
+            let version = dataset.version().version;
+            table.dataset.update(dataset);
+            Ok(DeleteResult {
+                num_deleted_rows,
+                version,
+            })
+        }
+        Predicate::Expr(expr) => {
+            let dataset = table.dataset.get().await?;
+            let delete_result = DeleteBuilder::from_expr(Arc::clone(&dataset), expr.clone())
+                .execute()
+                .await?;
+            let num_deleted_rows = delete_result.num_deleted_rows;
+            let version = delete_result.new_dataset.version().version;
+            table.dataset.update(
+                Arc::try_unwrap(delete_result.new_dataset).unwrap_or_else(|arc| (*arc).clone()),
+            );
+            Ok(DeleteResult {
+                num_deleted_rows,
+                version,
+            })
+        }
+    }
 }

 #[cfg(test)]
@@ -176,4 +201,100 @@ mod tests {
            "Table version must increment after delete operation"
        );
    }
+
+    #[tokio::test]
+    async fn test_delete_expr() {
+        use datafusion_expr::{col, lit};
+
+        let conn = connect("memory://").execute().await.unwrap();
+
+        // 1. Create a table with values 0 to 9
+        let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, false)]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from_iter_values(0..10))],
+        )
+        .unwrap();
+
+        let table = conn
+            .create_table("test_delete_expr", batch)
+            .execute()
+            .await
+            .unwrap();
+
+        // 2. Verify initial state
+        assert_eq!(table.count_rows(None).await.unwrap(), 10);
+        let initial_version = table.version().await.unwrap();
+
+        // 3. Execute Delete with Expr (removes values > 5)
+        let expr = col("i").gt(lit(5));
+        table.delete(&expr).await.unwrap();
+
+        // 4. Verify results
+        assert_eq!(table.count_rows(None).await.unwrap(), 6); // 0, 1, 2, 3, 4, 5 remain
+        let current_version = table.version().await.unwrap();
+        assert!(
+            current_version > initial_version,
+            "Table version must increment after delete_expr operation"
+        );
+
+        // 5. Verify specific data consistency
+        let batches = table
+            .query()
+            .execute()
+            .await
+            .unwrap()
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        let batch = &batches[0];
+        let array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Ensure no value > 5 exists
+        for val in array.iter() {
+            assert!(val.unwrap() <= 5);
+        }
+    }
+
+    #[tokio::test]
+    async fn test_delete_expr_increments_version() {
+        use datafusion_expr::lit;
+
+        let conn = connect("memory://").execute().await.unwrap();
+
+        // Create a table with 5 rows
+        let batch = record_batch!(("id", Int32, [1, 2, 3, 4, 5])).unwrap();
+
+        let table = conn
+            .create_table("test_delete_expr_noop", batch)
+            .execute()
+            .await
+            .unwrap();
+
+        // Capture the initial state (Rows = 5, Version = 1)
+        let initial_rows = table.count_rows(None).await.unwrap();
+        let initial_version = table.version().await.unwrap();
+
+        assert_eq!(initial_rows, 5);
+        let expr = lit(false);
+        table.delete(&expr).await.unwrap();
+
+        // Rows should still be 5
+        let current_rows = table.count_rows(None).await.unwrap();
+        assert_eq!(
+            current_rows, initial_rows,
+            "Data should not change when predicate is false"
+        );
+
+        // version check
+        let current_version = table.version().await.unwrap();
+        assert!(
+            current_version > initial_version,
+            "Table version must increment after delete_expr operation"
+        );
+    }
 }
--- a/rust/lancedb/src/utils/mod.rs
+++ b/rust/lancedb/src/utils/mod.rs
@@ -6,7 +6,7 @@ pub(crate) mod background_cache;
 use std::sync::Arc;

 use arrow_array::RecordBatch;
-use arrow_schema::{DataType, Schema, SchemaRef};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::{DataFusionError, Result as DataFusionResult};
 use datafusion_execution::RecordBatchStream;
 use futures::{FutureExt, Stream};
@@ -152,14 +152,10 @@ pub fn validate_namespace(namespace: &[String]) -> Result<()> {
 /// Find one default column to create index or perform vector query.
 pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
    // Try to find a vector column.
-    let candidates = schema
-        .fields()
-        .iter()
-        .filter_map(|field| match infer_vector_dim(field.data_type()) {
-            Ok(d) if dim.is_none() || dim == Some(d as i32) => Some(field.name()),
-            _ => None,
-        })
-        .collect::<Vec<_>>();
+    let mut candidates = Vec::new();
+    for field in schema.fields() {
+        collect_vector_columns(field, &mut Vec::new(), dim, &mut candidates);
+    }
    if candidates.is_empty() {
        Err(Error::InvalidInput {
            message: format!(
@@ -180,6 +176,57 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
    }
 }

+fn collect_vector_columns(
+    field: &Field,
+    path: &mut Vec<String>,
+    dim: Option<i32>,
+    candidates: &mut Vec<String>,
+) {
+    path.push(field.name().clone());
+    match infer_vector_dim(field.data_type()) {
+        Ok(d) if dim.is_none() || dim == Some(d as i32) => {
+            let path_segments = path.iter().map(String::as_str).collect::<Vec<_>>();
+            candidates.push(lance_core::datatypes::format_field_path(&path_segments));
+        }
+        _ => {
+            if let DataType::Struct(fields) = field.data_type() {
+                for child in fields {
+                    collect_vector_columns(child, path, dim, candidates);
+                }
+            }
+        }
+    }
+    path.pop();
+}
+
+pub(crate) fn resolve_arrow_field_path(schema: &Schema, column: &str) -> Result<(String, Field)> {
+    lance_core::datatypes::parse_field_path(column).map_err(|e| Error::InvalidInput {
+        message: format!("Invalid field path `{}`: {}", column, e),
+    })?;
+
+    let lance_schema =
+        lance_core::datatypes::Schema::try_from(schema).map_err(|e| Error::Schema {
+            message: format!("Invalid schema: {}", e),
+        })?;
+    let field_path = lance_schema
+        .resolve_case_insensitive(column)
+        .ok_or_else(|| Error::Schema {
+            message: format!(
+                "Field path `{}` not found in schema. Available field paths: {}",
+                column,
+                lance_schema.field_paths().join(", ")
+            ),
+        })?;
+    let field = field_path.last().expect("field path should be non-empty");
+    let path_segments = field_path
+        .iter()
+        .map(|field| field.name.as_str())
+        .collect::<Vec<_>>();
+    let canonical_path = lance_core::datatypes::format_field_path(&path_segments);
+
+    Ok((canonical_path, Field::from(*field)))
+}
+
 pub fn supported_btree_data_type(dtype: &DataType) -> bool {
    dtype.is_integer()
        || dtype.is_floating()
@@ -450,6 +497,49 @@ mod tests {
            "vec"
        );

+        let schema_with_nested_vec_col = Schema::new(vec![
+            Field::new("id", DataType::Int16, true),
+            Field::new(
+                "image",
+                DataType::Struct(
+                    vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, false)),
+                            10,
+                        ),
+                        false,
+                    )]
+                    .into(),
+                ),
+                false,
+            ),
+        ]);
+        assert_eq!(
+            default_vector_column(&schema_with_nested_vec_col, None).unwrap(),
+            "image.embedding"
+        );
+
+        let schema_with_escaped_nested_vec_col = Schema::new(vec![Field::new(
+            "image-meta",
+            DataType::Struct(
+                vec![Field::new(
+                    "embedding.v1",
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("item", DataType::Float32, false)),
+                        10,
+                    ),
+                    false,
+                )]
+                .into(),
+            ),
+            false,
+        )]);
+        assert_eq!(
+            default_vector_column(&schema_with_escaped_nested_vec_col, None).unwrap(),
+            "`image-meta`.`embedding.v1`"
+        );
+
        let multi_vec_col = Schema::new(vec![
            Field::new("id", DataType::Int16, true),
            Field::new(
@@ -469,6 +559,48 @@ mod tests {
                .to_string()
                .contains("More than one")
        );
+
+        let multi_nested_vec_col = Schema::new(vec![
+            Field::new(
+                "image",
+                DataType::Struct(
+                    vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, false)),
+                            10,
+                        ),
+                        false,
+                    )]
+                    .into(),
+                ),
+                false,
+            ),
+            Field::new(
+                "text",
+                DataType::Struct(
+                    vec![Field::new(
+                        "embedding",
+                        DataType::FixedSizeList(
+                            Arc::new(Field::new("item", DataType::Float32, false)),
+                            50,
+                        ),
+                        false,
+                    )]
+                    .into(),
+                ),
+                false,
+            ),
+        ]);
+        assert_eq!(
+            default_vector_column(&multi_nested_vec_col, Some(50)).unwrap(),
+            "text.embedding"
+        );
+        let err = default_vector_column(&multi_nested_vec_col, None)
+            .unwrap_err()
+            .to_string();
+        assert!(err.contains("image.embedding"));
+        assert!(err.contains("text.embedding"));
    }

    #[test]