Bump version: 0.31.0-beta.5 → 0.31.0-beta.6

Bump version: 0.34.0-beta.5 → 0.34.0-beta.6
fix(python): average MRR reciprocal ranks over all rankings (#3599 )
2026-07-02 18:40:40 +00:00 · 2026-07-02 11:33:53 +00:00 · 2026-07-02 11:32:45 +00:00 · 2026-07-01 15:36:56 -07:00 · 2026-07-01 10:28:58 -07:00 · 2026-07-01 10:11:58 -07:00
50 changed files with 594 additions and 196 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.31.0-beta.4"
+current_version = "0.31.0-beta.6"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -25,7 +25,7 @@ jobs:
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
@@ -47,7 +47,7 @@ jobs:
      contents: read
      issues: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/codex-fix-ci.yml
+++ b/.github/workflows/codex-fix-ci.yml
@@ -36,14 +36,14 @@ jobs:
          echo "guidelines = ${{ inputs.guidelines }}"
      - name: Checkout Repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.branch }}
          fetch-depth: 0
          persist-credentials: true
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          # pnpm 11 (used by the nodejs install step below) requires
          # Node >= 22.13; use 24 since 22 hits EOL in October.
@@ -82,7 +82,7 @@ jobs:
          cache: maven
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Install Node.js dependencies for TypeScript bindings
--- a/.github/workflows/codex-update-lance-dependency.yml
+++ b/.github/workflows/codex-update-lance-dependency.yml
@@ -30,13 +30,13 @@ jobs:
          echo "tag = ${{ inputs.tag || 'latest' }}"
      - name: Checkout Repo LanceDB
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: true
      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: 20
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -27,7 +27,7 @@ jobs:
    name: Verify PR title / description conforms to semantic-release
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@v6
        with:
          node-version: "18"
      # These rules are disabled because Github will always ensure there
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -35,7 +35,7 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Install dependencies needed for ubuntu
        run: |
          sudo apt install -y protobuf-compiler libssl-dev
@@ -53,7 +53,7 @@ jobs:
          python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
          python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
      - name: Set up node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: 20
          cache: 'npm'
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -32,7 +32,7 @@ jobs:
        working-directory: ./java
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Java 8
        uses: actions/setup-java@v4
        with:
@@ -73,7 +73,7 @@ jobs:
      contents: read
      issues: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -36,7 +36,7 @@ jobs:
        working-directory: ./java
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Java 17
        uses: actions/setup-java@v4
        with:
--- a/.github/workflows/license-header-check.yml
+++ b/.github/workflows/license-header-check.yml
@@ -19,7 +19,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Install license-header-checker
        working-directory: /tmp
        run: |
--- a/.github/workflows/make-release-commit.yml
+++ b/.github/workflows/make-release-commit.yml
@@ -49,7 +49,7 @@ jobs:
    steps:
      - name: Output Inputs
        run: echo "${{ toJSON(github.event.inputs) }}"
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -38,14 +38,14 @@ jobs:
      CC: gcc-12
      CXX: g++-12
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0
        lfs: true
-    - uses: pnpm/action-setup@v4
+    - uses: pnpm/action-setup@v6
      with:
        version: 11.1.1
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      with:
        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
        # in October. The library itself still supports Node >= 18
@@ -86,14 +86,14 @@ jobs:
        shell: bash
        working-directory: nodejs
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0
        lfs: true
-    - uses: pnpm/action-setup@v4
+    - uses: pnpm/action-setup@v6
      with:
        version: 11.1.1
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      name: Setup Node.js 24 for build
      with:
        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
@@ -130,7 +130,7 @@ jobs:
          echo "Run 'pnpm run docs', fix any warnings, and commit the changes."
          exit 1
        fi
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      name: Setup Node.js ${{ matrix.node-version }} for test
      with:
        node-version: ${{ matrix.node-version }}
@@ -166,14 +166,14 @@ jobs:
        shell: bash
        working-directory: nodejs
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0
        lfs: true
-    - uses: pnpm/action-setup@v4
+    - uses: pnpm/action-setup@v6
      with:
        version: 11.1.1
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      with:
        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
        # in October.
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -32,7 +32,7 @@ jobs:
    permissions:
      contents: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -170,13 +170,13 @@ jobs:
      run:
        working-directory: nodejs
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
          # in October.
@@ -190,7 +190,7 @@ jobs:
          toolchain: stable
          targets: ${{ matrix.settings.target }}
      - name: Cache cargo
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: |
            ~/.cargo/registry/index/
@@ -244,7 +244,7 @@ jobs:
        if: ${{ !matrix.settings.docker }}
        shell: bash
      - name: Upload artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: lancedb-${{ matrix.settings.target }}
          path: nodejs/dist/*.node
@@ -256,7 +256,7 @@ jobs:
        run: pnpm tsc
      - name: Upload Generic Artifacts
        if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: nodejs-dist
          path: |
@@ -287,13 +287,13 @@ jobs:
        shell: bash
        working-directory: nodejs
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Setup Node.js 24 for install
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
          # in October.
@@ -303,18 +303,18 @@ jobs:
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
      - name: Setup Node.js ${{ matrix.node }} for test
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: ${{ matrix.node }}
      - name: Download artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          name: lancedb-${{ matrix.settings.target }}
          path: nodejs/dist/
          # For testing purposes:
          # run-id: 13982782871
          # github-token: ${{ secrets.GITHUB_TOKEN }} # token with actions:read permissions on target repo
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
        with:
          name: nodejs-dist
          path: nodejs/dist
@@ -339,13 +339,13 @@ jobs:
    needs:
      - test-lancedb
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: 24
          cache: pnpm
@@ -353,14 +353,14 @@ jobs:
          registry-url: "https://registry.npmjs.org"
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
        with:
          name: nodejs-dist
          path: nodejs/dist
          # For testing purposes:
          # run-id: 13982782871
          # github-token: ${{ secrets.GITHUB_TOKEN }} # token with actions:read permissions on target repo
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
        name: Download arch-specific binaries
        with:
          pattern: lancedb-*
@@ -398,7 +398,7 @@ jobs:
      contents: read
      issues: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -41,7 +41,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -66,7 +66,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -95,7 +95,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -126,7 +126,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -160,7 +160,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -189,7 +189,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -212,7 +212,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -40,7 +40,7 @@ jobs:
      CC: clang-18
      CXX: clang++-18
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -65,7 +65,7 @@ jobs:
    timeout-minutes: 10
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: EmbarkStudios/cargo-deny-action@v2
        with:
          command: check advisories bans licenses sources
@@ -78,7 +78,7 @@ jobs:
      CC: clang
      CXX: clang++
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      # Building without a lock file often requires the latest Rust version since downstream
      # dependencies may have updated their minimum Rust version.
      - uses: actions-rust-lang/setup-rust-toolchain@v1
@@ -113,7 +113,7 @@ jobs:
      CXX: clang++-18
      GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -152,7 +152,7 @@ jobs:
        shell: bash
        working-directory: rust
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -181,7 +181,7 @@ jobs:
      run:
        working-directory: rust/lancedb
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Set target
        run: rustup target add ${{ matrix.target }}
      - uses: Swatinem/rust-cache@v2
@@ -210,7 +210,7 @@ jobs:
      CC: clang-18
      CXX: clang++-18
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          submodules: true
      - name: Install dependencies
--- a/.github/workflows/update_package_lock_run.yml
+++ b/.github/workflows/update_package_lock_run.yml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: main
          persist-credentials: false
--- a/.github/workflows/update_package_lock_run_nodejs.yml
+++ b/.github/workflows/update_package_lock_run_nodejs.yml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: main
          persist-credentials: false
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5299,12 +5299,13 @@ dependencies = [
 [[package]]
 name = "lancedb"
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 dependencies = [
 "ahash",
 "anyhow",
 "arrow",
 "arrow-array",
 "arrow-buffer",
 "arrow-cast",
 "arrow-data",
 "arrow-ipc",
@@ -5383,7 +5384,7 @@ dependencies = [
 [[package]]
 name = "lancedb-nodejs"
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -5408,7 +5409,7 @@ dependencies = [
 [[package]]
 name = "lancedb-python"
-version = "0.34.0-beta.4"
+version = "0.34.0-beta.6"
 dependencies = [
 "arrow",
 "async-trait",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,7 @@ ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "58.0.0", optional = false }
 arrow-array = "58.0.0"
 arrow-buffer = "58.0.0"
 arrow-data = "58.0.0"
 arrow-ipc = "58.0.0"
 arrow-ord = "58.0.0"
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.31.0-beta.4</version>
+    <version>0.31.0-beta.6</version>
 </dependency>
 ```
--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -518,6 +518,9 @@ x > 5 OR y = 'test'
 Filtering performance can often be improved by creating a scalar index
 on the filter column(s).
 Calling this multiple times combines the filters with a logical AND rather
 than replacing the previous filter.
 ```
 #### Inherited from
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -767,6 +767,9 @@ x > 5 OR y = 'test'
 Filtering performance can often be improved by creating a scalar index
 on the filter column(s).
 Calling this multiple times combines the filters with a logical AND rather
 than replacing the previous filter.
 ```
 #### Inherited from
--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.31.0-beta.4</version>
+      <version>0.31.0-beta.6</version>
      <relativePath>../pom.xml</relativePath>
    </parent>
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.31.0-beta.4</version>
+    <version>0.31.0-beta.6</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 publish = false
 license.workspace = true
 description.workspace = true
--- a/nodejs/test/query.test.ts
+++ b/nodejs/test/query.test.ts
@@ -215,6 +215,20 @@ describe("Query orderBy", () => {
    expect(results[2].score).toBeCloseTo(4.1, 0.001);
  });
  it("should combine repeated where clauses with AND", async () => {
    const results = await table
      .query()
      .where("score > 1.0")
      .where("score < 3.0")
      .orderBy({ columnName: "score" })
      .toArray();
    // Only rows matching both predicates should be returned, rather than the
    // second where() silently replacing the first.
    expect(results.length).toBe(2);
    expect(results[0].score).toBeCloseTo(1.2, 0.001);
    expect(results[1].score).toBeCloseTo(2.8, 0.001);
  });
  it("should support method chaining with limit", async () => {
    const results = await table
      .query()
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -362,6 +362,9 @@ export class StandardQueryBase<
   *
   * Filtering performance can often be improved by creating a scalar index
   * on the filter column(s).
   *
   * Calling this multiple times combines the filters with a logical AND rather
   * than replacing the previous filter.
   */
  where(predicate: string): this {
    this.doCall((inner: NativeQueryType) => inner.onlyIf(predicate));
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.31.0-beta.4",
+  "version": "0.31.0-beta.6",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.31.0-beta.4",
+  "version": "0.31.0-beta.6",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.31.0-beta.4",
+      "version": "0.31.0-beta.6",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.31.0-beta.4",
+  "version": "0.31.0-beta.6",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.34.0-beta.5"
+current_version = "0.34.0-beta.6"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.34.0-beta.5"
+version = "0.34.0-beta.6"
 publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -51,6 +51,15 @@ class LanceMergeInsertBuilder(object):
        If there are multiple matches then the behavior is undefined.
        Currently this causes multiple copies of the row to be created
        but that behavior is subject to change.
        Parameters
        ----------
        where: Optional[str], default None
            An optional filter to limit which rows are updated. Column
            references in this expression must be prefixed with "target."
            to refer to the existing table data. For example, to only
            update rows where the existing color is red, use:
            ``where="target.color = 'red'"``
        """
        self._when_matched_update_all = True
        self._when_matched_update_all_condition = where
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
@@ -45,11 +45,6 @@ from lancedb._lancedb import (
 )
 from lancedb.background_loop import LOOP
 from lancedb.db import AsyncConnection, DBConnection
 from lancedb.namespace_utils import (
    _normalize_create_namespace_mode,
    _normalize_drop_namespace_mode,
    _normalize_drop_namespace_behavior,
 )
 from lance_namespace import (
    LanceNamespace,
    connect as namespace_connect,
@@ -58,13 +53,6 @@ from lance_namespace import (
    DropNamespaceResponse,
    ListNamespacesResponse,
    ListTablesResponse,
    ListTablesRequest,
    DescribeNamespaceRequest,
    DropTableRequest,
    RenameTableRequest,
    ListNamespacesRequest,
    CreateNamespaceRequest,
    DropNamespaceRequest,
 )
 from lancedb.table import AsyncTable, LanceTable, Table
 from lancedb.util import validate_table_name
@@ -389,7 +377,7 @@ def _builds_namespace_natively(
    return namespace_client_impl == "rest" and bool(namespace_client_properties)
-def _supports_native_sync_namespace(namespace_client_impl: str) -> bool:
+def _supports_native_namespace(namespace_client_impl: str) -> bool:
    return namespace_client_impl in {"dir", "rest"}
@@ -412,7 +400,6 @@ class LanceNamespaceDBConnection(DBConnection):
        namespace_client_impl: Optional[str] = None,
        namespace_client_properties: Optional[Dict[str, str]] = None,
        _inner: Optional[AsyncConnection] = None,
        _route_pushdown_to_rust: Optional[bool] = None,
    ):
        """
        Initialize a namespace-based LanceDB connection.
@@ -454,16 +441,12 @@ class LanceNamespaceDBConnection(DBConnection):
        )
        self._namespace_client_impl = namespace_client_impl
        self._namespace_client_properties = namespace_client_properties
-        # When the namespace client is built natively (see Rust
+        # When the namespace connection or client is built natively in Rust, the
-        # ``build_namespace_natively``), the underlying Rust table performs
+        # underlying Rust table performs QueryTable pushdown through the
-        # QueryTable pushdown through the read-freshness context provider, which
+        # read-freshness context provider, which the pure-Python ``query_table``
-        # the pure-Python ``query_table`` path bypasses.
+        # path bypasses.
-        self._route_pushdown_to_rust = (
+        self._route_pushdown_to_rust = _inner is not None or _builds_namespace_natively(
-            _route_pushdown_to_rust
+            namespace_client_impl, namespace_client_properties
            if _route_pushdown_to_rust is not None
            else _builds_namespace_natively(
                namespace_client_impl, namespace_client_properties
            )
        )
        if _inner is not None:
            self._inner = _inner
@@ -909,7 +892,7 @@ class AsyncLanceNamespaceDBConnection:
    def __init__(
        self,
-        namespace_client: LanceNamespace,
+        namespace_client: Optional[LanceNamespace] = None,
        *,
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
@@ -917,6 +900,7 @@ class AsyncLanceNamespaceDBConnection:
        namespace_client_pushdown_operations: Optional[List[str]] = None,
        namespace_client_impl: Optional[str] = None,
        namespace_client_properties: Optional[Dict[str, str]] = None,
        _inner: Optional[AsyncConnection] = None,
    ):
        """
        Initialize an async namespace-based LanceDB connection.
@@ -958,29 +942,35 @@ class AsyncLanceNamespaceDBConnection:
        )
        self._namespace_client_impl = namespace_client_impl
        self._namespace_client_properties = namespace_client_properties
-        # See LanceNamespaceDBConnection: when built natively the Rust table runs
+        # See LanceNamespaceDBConnection: when Rust owns the namespace
-        # QueryTable pushdown through the read-freshness provider, so defer to it
+        # connection/client, its table performs QueryTable pushdown through the
-        # rather than the urllib3 client (which omits x-lancedb-min-timestamp).
+        # read-freshness provider, so defer to it rather than the urllib3 client
-        self._route_pushdown_to_rust = _builds_namespace_natively(
+        # path (which omits x-lancedb-min-timestamp).
        self._route_pushdown_to_rust = _inner is not None or _builds_namespace_natively(
            namespace_client_impl, namespace_client_properties
        )
-        self._inner = AsyncConnection(
+        if _inner is not None:
-            _connect_namespace_client(
+            self._inner = _inner
-                namespace_client,
+        else:
-                read_consistency_interval=(
+            if namespace_client is None:
-                    read_consistency_interval.total_seconds()
+                raise ValueError("namespace_client is required without a native _inner")
-                    if read_consistency_interval is not None
+            self._inner = AsyncConnection(
-                    else None
+                _connect_namespace_client(
-                ),
+                    namespace_client,
-                storage_options=self.storage_options or None,
+                    read_consistency_interval=(
-                session=session,
+                        read_consistency_interval.total_seconds()
-                namespace_client_pushdown_operations=(
+                        if read_consistency_interval is not None
-                    list(self._namespace_client_pushdown_operations)
+                        else None
-                ),
+                    ),
-                namespace_client_impl=namespace_client_impl,
+                    storage_options=self.storage_options or None,
-                namespace_client_properties=namespace_client_properties,
+                    session=session,
                    namespace_client_pushdown_operations=(
                        list(self._namespace_client_pushdown_operations)
                    ),
                    namespace_client_impl=namespace_client_impl,
                    namespace_client_properties=namespace_client_properties,
                )
            )
        )
    async def table_names(
        self,
@@ -1004,11 +994,9 @@ class AsyncLanceNamespaceDBConnection:
        )
        if namespace_path is None:
            namespace_path = []
-        request = ListTablesRequest(
+        return await self._inner.table_names(
-            id=namespace_path, page_token=page_token, limit=limit
+            namespace_path=namespace_path, start_after=page_token, limit=limit
        )
        response = self._namespace_client.list_tables(request)
        return response.tables if response.tables else []
    async def create_table(
        self,
@@ -1071,8 +1059,8 @@ class AsyncLanceNamespaceDBConnection:
                storage_options=storage_options,
                index_cache_size=index_cache_size,
            )
-        except RuntimeError as e:
+        except (RuntimeError, ValueError) as e:
-            if "Table not found" in str(e):
+            if "Table not found" in str(e) or "was not found" in str(e):
                table_id = namespace_path + [name]
                raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
            raise
@@ -1093,9 +1081,7 @@ class AsyncLanceNamespaceDBConnection:
        """Drop a table from the namespace."""
        if namespace_path is None:
            namespace_path = []
-        table_id = namespace_path + [name]
+        await self._inner.drop_table(name, namespace_path=namespace_path)
        request = DropTableRequest(id=table_id)
        self._namespace_client.drop_table(request)
    async def rename_table(
        self,
@@ -1109,14 +1095,17 @@ class AsyncLanceNamespaceDBConnection:
            cur_namespace_path = []
        if new_namespace_path is None:
            new_namespace_path = []
-        cur_table_id = cur_namespace_path + [cur_name]
+        try:
-        new_namespace_id = new_namespace_path if new_namespace_path else None
+            await self._inner.rename_table(
-        request = RenameTableRequest(
+                cur_name,
-            id=cur_table_id,
+                new_name,
-            new_table_name=new_name,
+                cur_namespace_path=cur_namespace_path,
-            new_namespace_id=new_namespace_id,
+                new_namespace_path=new_namespace_path,
-        )
+            )
-        self._namespace_client.rename_table(request)
+        except RuntimeError as e:
            if "rename_table not implemented" in str(e):
                raise NotImplementedError("rename_table not implemented") from e
            raise
    async def drop_database(self):
        """Deprecated method."""
@@ -1128,9 +1117,7 @@ class AsyncLanceNamespaceDBConnection:
        """Drop all tables in the namespace."""
        if namespace_path is None:
            namespace_path = []
-        table_names = await self.table_names(namespace_path=namespace_path)
+        await self._inner.drop_all_tables(namespace_path=namespace_path)
        for table_name in table_names:
            await self.drop_table(table_name, namespace_path=namespace_path)
    async def list_namespaces(
        self,
@@ -1159,13 +1146,8 @@ class AsyncLanceNamespaceDBConnection:
        """
        if namespace_path is None:
            namespace_path = []
-        request = ListNamespacesRequest(
+        return await self._inner.list_namespaces(
-            id=namespace_path, page_token=page_token, limit=limit
+            namespace_path=namespace_path, page_token=page_token, limit=limit
        )
        response = self._namespace_client.list_namespaces(request)
        return ListNamespacesResponse(
            namespaces=response.namespaces if response.namespaces else [],
            page_token=response.page_token,
        )
    async def create_namespace(
@@ -1192,15 +1174,11 @@ class AsyncLanceNamespaceDBConnection:
        CreateNamespaceResponse
            Response containing the properties of the created namespace.
        """
-        request = CreateNamespaceRequest(
+        return await self._inner.create_namespace(
-            id=namespace_path,
+            namespace_path=namespace_path,
-            mode=_normalize_create_namespace_mode(mode),
+            mode=mode,
            properties=properties,
        )
        response = self._namespace_client.create_namespace(request)
        return CreateNamespaceResponse(
            properties=response.properties if hasattr(response, "properties") else None
        )
    async def drop_namespace(
        self,
@@ -1226,20 +1204,16 @@ class AsyncLanceNamespaceDBConnection:
        DropNamespaceResponse
            Response containing properties and transaction_id if applicable.
        """
-        request = DropNamespaceRequest(
+        try:
-            id=namespace_path,
+            return await self._inner.drop_namespace(
-            mode=_normalize_drop_namespace_mode(mode),
+                namespace_path=namespace_path,
-            behavior=_normalize_drop_namespace_behavior(behavior),
+                mode=mode,
-        )
+                behavior=behavior,
-        response = self._namespace_client.drop_namespace(request)
+            )
-        return DropNamespaceResponse(
+        except RuntimeError as e:
-            properties=(
+            if "Namespace not empty" in str(e):
-                response.properties if hasattr(response, "properties") else None
+                raise NamespaceNotEmptyError(str(e)) from e
-            ),
+            raise
            transaction_id=(
                response.transaction_id if hasattr(response, "transaction_id") else None
            ),
        )
    async def describe_namespace(
        self, namespace_path: List[str]
@@ -1257,11 +1231,7 @@ class AsyncLanceNamespaceDBConnection:
        DescribeNamespaceResponse
            Response containing the namespace properties.
        """
-        request = DescribeNamespaceRequest(id=namespace_path)
+        return await self._inner.describe_namespace(namespace_path)
        response = self._namespace_client.describe_namespace(request)
        return DescribeNamespaceResponse(
            properties=response.properties if hasattr(response, "properties") else None
        )
    async def list_tables(
        self,
@@ -1290,13 +1260,8 @@ class AsyncLanceNamespaceDBConnection:
        """
        if namespace_path is None:
            namespace_path = []
-        request = ListTablesRequest(
+        return await self._inner.list_tables(
-            id=namespace_path, page_token=page_token, limit=limit
+            namespace_path=namespace_path, page_token=page_token, limit=limit
        )
        response = self._namespace_client.list_tables(request)
        return ListTablesResponse(
            tables=response.tables if response.tables else [],
            page_token=response.page_token,
        )
    async def namespace_client(self) -> LanceNamespace:
@@ -1310,6 +1275,18 @@ class AsyncLanceNamespaceDBConnection:
        LanceNamespace
            The namespace client for this connection.
        """
        if self._namespace_client is None:
            if (
                self._namespace_client_impl is None
                or self._namespace_client_properties is None
            ):
                raise ValueError(
                    "Cannot construct a Python namespace client without "
                    "namespace implementation properties"
                )
            self._namespace_client = namespace_connect(
                self._namespace_client_impl, self._namespace_client_properties
            )
        return self._namespace_client
@@ -1360,7 +1337,7 @@ def connect_namespace(
    LanceNamespaceDBConnection
        A namespace-based connection to LanceDB
    """
-    if _supports_native_sync_namespace(namespace_client_impl):
+    if _supports_native_namespace(namespace_client_impl):
        inner = AsyncConnection(
            _connect_namespace(
                namespace_client_impl,
@@ -1384,7 +1361,6 @@ def connect_namespace(
            namespace_client_impl=namespace_client_impl,
            namespace_client_properties=namespace_client_properties,
            _inner=inner,
            _route_pushdown_to_rust=True,
        )
    namespace_client = namespace_connect(
@@ -1462,6 +1438,32 @@ def connect_namespace_async(
    ...     tables = await db.table_names()
    ...     table = await db.create_table("my_table", schema=schema)
    """
    if _supports_native_namespace(namespace_client_impl):
        inner = AsyncConnection(
            _connect_namespace(
                namespace_client_impl,
                namespace_client_properties,
                read_consistency_interval=(
                    read_consistency_interval.total_seconds()
                    if read_consistency_interval is not None
                    else None
                ),
                storage_options=storage_options,
                session=session,
                namespace_client_pushdown_operations=namespace_client_pushdown_operations,
            )
        )
        return AsyncLanceNamespaceDBConnection(
            namespace_client=None,
            read_consistency_interval=read_consistency_interval,
            storage_options=storage_options,
            session=session,
            namespace_client_pushdown_operations=namespace_client_pushdown_operations,
            namespace_client_impl=namespace_client_impl,
            namespace_client_properties=namespace_client_properties,
            _inner=inner,
        )
    namespace_client = namespace_connect(
        namespace_client_impl, namespace_client_properties
    )
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -119,6 +119,27 @@ def _filter_to_sql(filter: Optional[Union[str, Expr]]) -> Optional[str]:
    return filter
 def _combine_where(
    existing: Optional[Union[str, Expr]], new: Union[str, Expr]
 ) -> Union[str, Expr]:
    """Combine a new filter with an existing one using a logical AND.
    Calling ``where`` more than once composes the filters with AND instead of
    replacing the previous filter. Two :class:`~lancedb.expr.Expr` filters are
    combined as an expression; otherwise both filters are lowered to SQL strings
    and combined as SQL.
    """
    if existing is None:
        return new
    existing_is_expr = isinstance(existing, Expr)
    new_is_expr = isinstance(new, Expr)
    if existing_is_expr and new_is_expr:
        return existing & new
    existing_sql = existing.to_sql() if existing_is_expr else existing
    new_sql = new.to_sql() if new_is_expr else new
    return f"({existing_sql}) AND ({new_sql})"
 def _projection_to_scanner_kwargs(
    columns: Optional[
        Union[
@@ -1148,8 +1169,13 @@ class LanceQueryBuilder(ABC):
        -------
        LanceQueryBuilder
            The LanceQueryBuilder object.
        Notes
        -----
        Calling this multiple times combines the filters with a logical AND
        rather than replacing the previous filter.
        """
-        self._where = where
+        self._where = _combine_where(self._where, where)
        self._postfilter = not prefilter
        return self
@@ -1693,8 +1719,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        -------
        LanceQueryBuilder
            The LanceQueryBuilder object.
        Notes
        -----
        Calling this multiple times combines the filters with a logical AND
        rather than replacing the previous filter.
        """
-        self._where = where
+        self._where = _combine_where(self._where, where)
        if prefilter is not None:
            self._postfilter = not prefilter
        return self
@@ -2894,6 +2925,9 @@ class AsyncStandardQuery(AsyncQueryBase):
        Filtering performance can often be improved by creating a scalar index
        on the filter column(s).
        Calling this multiple times combines the filters with a logical AND
        rather than replacing the previous filter.
        """
        if isinstance(predicate, Expr):
            self._inner.where_expr(predicate._inner)
--- a/python/python/lancedb/rerankers/mrr.py
+++ b/python/python/lancedb/rerankers/mrr.py
@@ -156,9 +156,16 @@ class MRRReranker(Reranker):
                reciprocal_rank = 1.0 / rank
                mrr_score_map[result_id].append(reciprocal_rank)
        # MRR averages the reciprocal rank across *all* ranking systems, treating
        # a system in which a document does not appear as a reciprocal rank of 0.
        # We therefore divide by the total number of systems, not by the number of
        # systems the document happens to appear in -- otherwise a document found
        # by a single ranking would outrank one ranked highly by every system,
        # defeating the purpose of fusing the rankings.
        num_systems = len(vector_results)
        final_mrr_scores = {}
        for result_id, reciprocal_ranks in mrr_score_map.items():
-            mean_rr = np.mean(reciprocal_ranks)
+            mean_rr = float(np.sum(reciprocal_ranks)) / num_systems
            final_mrr_scores[result_id] = mean_rr
        combined = pa.concat_tables(vector_results, **self._concat_tables_args)
--- a/python/python/tests/test_namespace.py
+++ b/python/python/tests/test_namespace.py
@@ -599,6 +599,61 @@ class TestAsyncNamespaceConnection:
        table_names = await db.table_names()
        assert len(list(table_names)) == 0
    async def test_async_builtin_namespace_uses_rust_without_python_client(
        self, monkeypatch
    ):
        """Built-in async namespace connections should not construct or call the
        Python namespace client for normal namespace/table management."""
        namespace_module = importlib.import_module("lancedb.namespace")
        def fail_namespace_connect(*args, **kwargs):
            raise AssertionError("Python namespace client should not be constructed")
        monkeypatch.setattr(
            namespace_module, "namespace_connect", fail_namespace_connect
        )
        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
        assert isinstance(db, lancedb.AsyncLanceNamespaceDBConnection)
        assert db._namespace_client is None
        assert db._route_pushdown_to_rust is True
        await db.create_namespace(["test_ns"])
        assert "test_ns" in (await db.list_namespaces()).namespaces
        schema = pa.schema([pa.field("id", pa.int64())])
        table = await db.create_table(
            "test_table", schema=schema, namespace_path=["test_ns"]
        )
        assert table._namespace_path == ["test_ns"]
        assert table._namespace_client is None
        assert table._route_pushdown_to_rust is True
        assert "test_table" in await db.table_names(namespace_path=["test_ns"])
        assert "test_table" in (await db.list_tables(namespace_path=["test_ns"])).tables
        opened = await db.open_table("test_table", namespace_path=["test_ns"])
        assert opened._namespace_path == ["test_ns"]
        await db.drop_table("test_table", namespace_path=["test_ns"])
        assert (await db.list_tables(namespace_path=["test_ns"])).tables == []
        await db.drop_namespace(["test_ns"])
        assert "test_ns" not in (await db.list_namespaces()).namespaces
    async def test_async_namespace_client_is_lazy(self):
        """namespace_client() should still return the backing client on demand."""
        pytest.importorskip("lance")
        from lance.namespace import DirectoryNamespace
        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
        assert db._namespace_client is None
        ns_client = await db.namespace_client()
        assert isinstance(ns_client, DirectoryNamespace)
        namespace_id = ns_client.namespace_id().replace("\\\\", "\\")
        assert str(self.temp_dir) in namespace_id
        assert db._namespace_client is ns_client
    # Async connect via namespace helper is not enabled yet.
    async def test_create_table_async(self):
@@ -870,10 +925,11 @@ class TestPushdownOperations:
        )
        assert db._route_pushdown_to_rust is True
-    def test_async_route_pushdown_to_rust_false_for_dir(self):
+    def test_async_route_pushdown_to_rust_for_native_dir(self):
-        """The async non-native (dir) connection keeps the Python pushdown path."""
+        """The async dir connection is natively built and defers QueryTable
        pushdown to Rust."""
        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
-        assert db._route_pushdown_to_rust is False
+        assert db._route_pushdown_to_rust is True
    def test_lance_table_to_arrow_uses_query_pushdown(self):
        namespace_client = _NamespaceClient()
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -502,6 +502,61 @@ def test_with_row_id(table: lancedb.table.Table):
    assert rs["_rowid"].to_pylist() == [0, 1]
 def test_where_repeated_combines_with_and(table: lancedb.table.Table):
    # Calling where() more than once should AND the filters together instead of
    # silently replacing the previous one (regression test for #2649).
    builder = table.search().where("id >= 1").where("id < 2")
    assert builder._where == "(id >= 1) AND (id < 2)"
    ids = [row["id"] for row in builder.limit(10).to_list()]
    assert ids == [1]
 def test_where_repeated_combines_expr(table: lancedb.table.Table):
    from lancedb.expr import col, lit
    builder = table.search().where(col("id") >= lit(1)).where(col("id") < lit(2))
    ids = [row["id"] for row in builder.limit(10).to_list()]
    assert ids == [1]
 def test_where_mixed_filter_kinds_combines(table: lancedb.table.Table):
    # Mixing a SQL string filter with an expression filter lowers the
    # expression to SQL and combines them as SQL strings.
    from lancedb.expr import col, lit
    builder = table.search().where("id >= 1").where(col("id") < lit(2))
    ids = [row["id"] for row in builder.limit(10).to_list()]
    assert ids == [1]
@pytest.mark.asyncio
 async def test_where_repeated_combines_with_and_async(table_async: AsyncTable):
    ids = [
        row["id"]
        for row in (
            await table_async.query().where("id >= 1").where("id < 2").to_list()
        )
    ]
    assert ids == [1]
@pytest.mark.asyncio
 async def test_where_mixed_filter_kinds_combines_async(table_async: AsyncTable):
    from lancedb.expr import col, lit
    ids = [
        row["id"]
        for row in (
            await table_async.query()
            .where("id >= 1")
            .where(col("id") < lit(2))
            .to_list()
        )
    ]
    assert ids == [1]
 def test_distance_range(table: lancedb.table.Table):
    q = [0, 0]
    rs = table.search(q).to_arrow()
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -350,6 +350,38 @@ def test_mrr_reranker_empty_input():
        reranker.rerank_multivector([])
 def test_mrr_multivector_rewards_consensus():
    # Reciprocal ranks must be averaged across *all* ranking systems, treating a
    # missing system as 0. A document ranked first by every system must outrank a
    # document ranked first by only one of them.
    reranker = MRRReranker()
    def ranking(row_ids):
        return pa.table({"_rowid": pa.array(row_ids, type=pa.int64())})
    # Doc 1 is rank 1 in only the first system; doc 2 is rank 1 in two systems
    # and rank 2 in the third (strong cross-system consensus).
    rs1 = ranking([1, 2, 3])
    rs2 = ranking([2, 3, 4])
    rs3 = ranking([2, 5, 6])
    result = reranker.rerank_multivector([rs1, rs2, rs3])
    scores = {
        row_id: score
        for row_id, score in zip(
            result["_rowid"].to_pylist(),
            result["_relevance_score"].to_pylist(),
        )
    }
    # sum of reciprocal ranks / number of systems
    assert scores[1] == pytest.approx(1.0 / 3)
    assert scores[2] == pytest.approx((0.5 + 1.0 + 1.0) / 3)
    assert scores[2] > scores[1]
    # The consensus document ranks first overall.
    assert result["_rowid"].to_pylist()[0] == 2
 def test_rrf_reranker_distance():
    data = pa.table(
        {
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -14,6 +14,7 @@ rust-version.workspace = true
 ahash = { workspace = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true }
--- a/rust/lancedb/src/arrow.rs
+++ b/rust/lancedb/src/arrow.rs
@@ -3,7 +3,19 @@
 use std::{pin::Pin, sync::Arc};
 // Re-export the arrow crates we depend on so downstream consumers can build
 // `RecordBatch`/arrays/builders against the exact same arrow line lancedb was
 // compiled against, instead of declaring their own (potentially mismatched)
 // direct arrow dependencies. See https://github.com/lancedb/lancedb/issues/3575.
 pub use arrow;
 pub use arrow_array;
 pub use arrow_buffer;
 pub use arrow_cast;
 pub use arrow_data;
 pub use arrow_ipc;
 pub use arrow_ord;
 pub use arrow_schema;
 pub use arrow_select;
 use datafusion_common::DataFusionError;
 use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
 use futures::{Stream, StreamExt, TryStreamExt};
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -342,3 +342,9 @@ pub use connection::connect_namespace;
 /// Re-export Lance Session and ObjectStoreRegistry for custom session creation
 pub use lance::session::Session;
 pub use lance_io::object_store::ObjectStoreRegistry;
 /// Re-export DataFusion so consumers can build the `Expr` values that public
 /// query/merge APIs (e.g. [`query::QueryBase::only_if_expr`]) accept without
 /// declaring their own (potentially mismatched) direct `datafusion` dependency.
 /// See <https://github.com/lancedb/lancedb/issues/3575>.
 pub use datafusion;
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -401,6 +401,9 @@ pub trait QueryBase {
    ///
    /// Filtering performance can often be improved by creating a scalar index
    /// on the filter column(s).
    ///
    /// Calling this multiple times combines the filters with a logical AND
    /// (i.e. `(previous) AND (new)`) rather than replacing the previous filter.
    fn only_if(self, filter: impl AsRef<str>) -> Self;
    /// Only return rows which match the filter, using an expression builder.
@@ -423,6 +426,9 @@ pub trait QueryBase {
    ///
    /// Note: Expression filters are not supported for remote/server-side queries.
    /// Use [`QueryBase::only_if`] with SQL strings for remote tables.
    ///
    /// Calling this multiple times combines the expressions with a logical AND
    /// rather than replacing the previous filter.
    fn only_if_expr(self, filter: datafusion_expr::Expr) -> Self;
    /// Perform a full text search on the table.
@@ -535,12 +541,13 @@ impl<T: HasQuery> QueryBase for T {
    }
    fn only_if(mut self, filter: impl AsRef<str>) -> Self {
-        self.mut_query().filter = Some(QueryFilter::Sql(filter.as_ref().to_string()));
+        self.mut_query()
            .add_filter(QueryFilter::Sql(filter.as_ref().to_string()));
        self
    }
    fn only_if_expr(mut self, filter: datafusion_expr::Expr) -> Self {
-        self.mut_query().filter = Some(QueryFilter::Datafusion(filter));
+        self.mut_query().add_filter(QueryFilter::Datafusion(filter));
        self
    }
@@ -716,6 +723,39 @@ pub enum QueryFilter {
    Datafusion(Expr),
 }
 /// Combine two filters with a logical AND.
 ///
 /// This is used when a query receives more than one filter (for example when
 /// `where`/`only_if` is called multiple times) so the filters are composed
 /// with AND rather than the later filter silently replacing the earlier one.
 ///
 /// SQL string and expression filters are combined within their own
 /// representation. When the two representations are mixed, the expression is
 /// lowered to SQL (via [`crate::expr::expr_to_sql_string`]) and the filters are
 /// combined as SQL strings. Substrait filters cannot be combined and return an
 /// error.
 fn and_filters(existing: QueryFilter, new: QueryFilter) -> Result<QueryFilter> {
    match (existing, new) {
        (QueryFilter::Sql(lhs), QueryFilter::Sql(rhs)) => {
            Ok(QueryFilter::Sql(format!("({lhs}) AND ({rhs})")))
        }
        (QueryFilter::Datafusion(lhs), QueryFilter::Datafusion(rhs)) => {
            Ok(QueryFilter::Datafusion(lhs.and(rhs)))
        }
        (QueryFilter::Sql(lhs), QueryFilter::Datafusion(rhs)) => {
            let rhs = crate::expr::expr_to_sql_string(&rhs)?;
            Ok(QueryFilter::Sql(format!("({lhs}) AND ({rhs})")))
        }
        (QueryFilter::Datafusion(lhs), QueryFilter::Sql(rhs)) => {
            let lhs = crate::expr::expr_to_sql_string(&lhs)?;
            Ok(QueryFilter::Sql(format!("({lhs}) AND ({rhs})")))
        }
        _ => Err(Error::InvalidInput {
            message: "cannot combine a Substrait filter with another filter".to_string(),
        }),
    }
 }
 /// A basic query into a table without any kind of search
 ///
 /// This will result in a (potentially filtered) scan if executed
@@ -730,6 +770,13 @@ pub struct QueryRequest {
    /// Apply filter to the returned rows.
    pub filter: Option<QueryFilter>,
    /// An error recorded while combining repeated filters that could not be
    /// composed (see [`QueryRequest::add_filter`]). It is surfaced when the
    /// query is executed via [`QueryRequest::check_filter`]. We defer the error
    /// because the builder methods that set filters return `Self` rather than a
    /// `Result`.
    pub(crate) filter_error: Option<String>,
    /// Perform a full text search on the table.
    pub full_text_search: Option<FullTextSearchQuery>,
@@ -775,6 +822,7 @@ impl Default for QueryRequest {
            limit: None,
            offset: None,
            filter: None,
            filter_error: None,
            full_text_search: None,
            select: Select::All,
            fast_search: false,
@@ -788,6 +836,41 @@ impl Default for QueryRequest {
    }
 }
 impl QueryRequest {
    /// Add a filter, combining it with any existing filter using a logical AND.
    ///
    /// If the new filter cannot be combined with the existing one (because they
    /// use different representations) the error is recorded and surfaced later
    /// by [`Self::check_filter`].
    pub(crate) fn add_filter(&mut self, new: QueryFilter) {
        self.filter = Some(match self.filter.take() {
            None => new,
            Some(existing) => match and_filters(existing, new) {
                Ok(combined) => combined,
                Err(err) => {
                    // The filters were consumed while attempting to combine
                    // them; the recorded error is surfaced by `check_filter`
                    // before the query executes.
                    self.filter_error = Some(err.to_string());
                    return;
                }
            },
        });
    }
    /// Return an error if combining filters failed (see [`Self::add_filter`]).
    ///
    /// This must be called by every backend before executing a query.
    pub(crate) fn check_filter(&self) -> Result<()> {
        if let Some(message) = &self.filter_error {
            return Err(Error::InvalidInput {
                message: message.clone(),
            });
        }
        Ok(())
    }
 }
 /// A builder for LanceDB queries.
 ///
 /// See [`crate::Table::query`] for more details on queries
@@ -1682,6 +1765,70 @@ mod tests {
        }
    }
    #[tokio::test]
    async fn test_repeated_only_if_combines_with_and() {
        use crate::expr::{col, lit};
        let tmp_dir = tempdir().unwrap();
        let dataset_path = tmp_dir.path().join("test.lance");
        let uri = dataset_path.to_str().unwrap();
        let conn = connect(uri).execute().await.unwrap();
        let table = conn
            .create_table("my_table", make_non_empty_batches())
            .execute()
            .await
            .unwrap();
        let query = table.query().only_if("id > 0").only_if("id < 100");
        match &query.request.filter {
            Some(QueryFilter::Sql(sql)) => assert_eq!(sql, "(id > 0) AND (id < 100)"),
            other => panic!("expected combined SQL filter, got {other:?}"),
        }
        // A single filter is left untouched.
        let query = table.query().only_if("id > 0");
        match &query.request.filter {
            Some(QueryFilter::Sql(sql)) => assert_eq!(sql, "id > 0"),
            other => panic!("expected single SQL filter, got {other:?}"),
        }
        // Expression filters are combined with a logical AND as well.
        let query = table
            .query()
            .only_if_expr(col("id").gt(lit(0i32)))
            .only_if_expr(col("id").lt(lit(100i32)));
        match &query.request.filter {
            Some(QueryFilter::Datafusion(expr)) => {
                assert_eq!(
                    expr,
                    &col("id").gt(lit(0i32)).and(col("id").lt(lit(100i32)))
                );
            }
            other => panic!("expected combined Datafusion filter, got {other:?}"),
        }
        // Mixing an SQL string filter with an expression filter lowers the
        // expression to SQL and combines them as SQL strings.
        let query = table
            .query()
            .only_if("id > 0")
            .only_if_expr(col("id").lt(lit(100i32)));
        match &query.request.filter {
            Some(QueryFilter::Sql(sql)) => {
                let expected = format!(
                    "(id > 0) AND ({})",
                    crate::expr::expr_to_sql_string(&col("id").lt(lit(100i32))).unwrap()
                );
                assert_eq!(sql, &expected);
            }
            other => panic!("expected combined SQL filter, got {other:?}"),
        }
        assert!(query.request.check_filter().is_ok());
        // The combined filter executes without error.
        query.execute().await.unwrap();
    }
    #[tokio::test]
    async fn test_select_with_transform() {
        // TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -612,6 +612,7 @@ impl<S: HttpSend> RemoteTable<S> {
        body: &mut serde_json::Value,
        params: &QueryRequest,
    ) -> Result<()> {
        params.check_filter()?;
        body["prefilter"] = params.prefilter.into();
        if let Some(offset) = params.offset {
            body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset));
--- a/rust/lancedb/src/table/query.rs
+++ b/rust/lancedb/src/table/query.rs
@@ -35,6 +35,15 @@ pub enum AnyQuery {
    VectorQuery(VectorQueryRequest),
 }
 impl AnyQuery {
    pub(crate) fn base(&self) -> &QueryRequest {
        match self {
            Self::Query(query) => query,
            Self::VectorQuery(query) => &query.base,
        }
    }
 }
 //Decide between namespace or local
 pub async fn execute_query(
    table: &NativeTable,
@@ -108,6 +117,7 @@ pub async fn create_plan(
        AnyQuery::VectorQuery(query) => query.clone(),
        AnyQuery::Query(query) => VectorQueryRequest::from_plain_query(query.clone()),
    };
    query.base.check_filter()?;
    let ds_ref = table.dataset.get().await?;
    let schema = ds_ref.schema();
@@ -357,6 +367,7 @@ async fn execute_namespace_query(
 /// Convert an AnyQuery to the namespace QueryTableRequest format.
 fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
    query.base().check_filter()?;
    match query {
        AnyQuery::VectorQuery(vq) => {
            // Extract the query vector(s)
Author	SHA1	Message	Date
Lance Release	37466a0390	Bump version: 0.31.0-beta.5 → 0.31.0-beta.6	2026-07-02 11:33:53 +00:00
Lance Release	bfce8a510d	Bump version: 0.34.0-beta.5 → 0.34.0-beta.6	2026-07-02 11:32:45 +00:00
Armaan Sandhu	a1261e6299	fix(python): average MRR reciprocal ranks over all rankings (#3599 ) ## What `MRRReranker.rerank_multivector` averages each document's reciprocal ranks over the wrong denominator. It divides by the number of rankings the document happens to appear in, instead of the total number of rankings being fused. ```python # python/python/lancedb/rerankers/mrr.py for result_id, reciprocal_ranks in mrr_score_map.items(): mean_rr = np.mean(reciprocal_ranks) # divides by len(present systems) ``` `mrr_score_map[doc]` only accumulates a reciprocal rank for the systems in which the document was returned, so `np.mean` never accounts for the systems that missed it. ## Why it's wrong Mean Reciprocal Rank fusion treats a system that didn't return a document as a reciprocal rank of `0` and averages across all systems. That's the exact mechanism by which it rewards cross-system consensus. Dividing by the appearance count removes that, so a document liked by a single ranking can beat one ranked highly by every ranking. Concretely, fusing 3 vector rankings: \| Doc \| Ranks \| Current score \| Correct score \| \|-----\|-------\|---------------\|---------------\| \| A \| #1 in 1 system only \| `mean([1.0]) = 1.000` \| `1.0 / 3 = 0.333` \| \| B \| #1, #1, #2 across all 3 \| `mean([1, 1, .5]) = 0.833` \| `2.5 / 3 = 0.833` \| The current code ranks A above B - a document two of three rankings ignored outranks one all three ranked at or near the top. This also makes `rerank_multivector` inconsistent with `rerank_hybrid` in the same file, which already treats a missing system as `0` (`vector_rr = 0.0` / `fts_rr = 0.0`), and with the class docstring ("average of reciprocal ranks across different search results"). ## Fix Divide the summed reciprocal ranks by the total number of rankings: ```python num_systems = len(vector_results) ... mean_rr = float(np.sum(reciprocal_ranks)) / num_systems ``` ## Tests Adds `test_mrr_multivector_rewards_consensus`, which asserts the exact MRR scores and that the consensus document ranks first. It fails on `main` and passes with this change. Existing reranker tests are unaffected.	2026-07-01 15:36:56 -07:00
Neo-X7	17c499177f	docs(python): add missing parameter documentation for when_matched_update_all (#3536 ) Fixes #2493 Added target. prefix requirement to where parameter docstring.	2026-07-01 10:28:58 -07:00
Will Jones	d889321b5e	fix!: combine repeated where filters with AND instead of replacing (#3585 ) BREAKING CHANGE: When passing multiple where clauses to a query, they now stack instead of replacing the previous filter. Previously, calling `where`/`only_if` more than once on a query silently replaced the previous filter, so only the last filter was applied. This was surprising and could return rows that an earlier filter should have excluded. This implements the alternative suggested in https://github.com/lancedb/lancedb/pull/3514#issuecomment-4664901580: instead of rejecting a second filter, repeated filters are combined with a logical AND (`(previous) AND (new)`). The combination happens in the Rust core (`QueryBase::only_if` and `only_if_expr`), so it applies to all SDKs at once (Rust, Python async, and TypeScript). The Python sync query builder keeps its own filter state, so it combines filters in the binding layer as well. SQL string and expression filters are combined within their own representation. When the two representations are mixed, the expression is lowered to SQL (via `expr_to_sql_string`) and the filters are combined as SQL strings, so chaining `where` works regardless of which form each filter takes. Fixes #2649 ## Tests - Rust: `cargo test --features remote -p lancedb --lib query` - Python: `uv run --extra tests pytest python/tests/test_query.py` - TypeScript: `pnpm test __test__/query.test.ts` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-07-01 10:11:58 -07:00
Will Jones	8a37f2ad77	feat(rust): re-export arrow and datafusion crates from lancedb (#3576 ) lancedb's public API forces downstream crates to construct foreign types — `RecordBatch`/arrays/builders for `Table::add(...)` (arrow), and `datafusion_expr::Expr` for `only_if_expr`/`expr_projection`/merge filters. The required version must exactly match lancedb's internal arrow/datafusion line, but nothing on the API surface makes that visible. Drift surfaces only as confusing trait/type errors: ```text error[E0277]: the trait bound `RecordBatch: Scannable` is not satisfied = note: there are multiple different versions of crate `arrow_array` in the dependency graph ``` This re-exports the crates lancedb already pins, so consumers can rely on a single, guaranteed-matching line via a discoverable import path instead of declaring their own (potentially mismatched) direct dependency. - `lancedb::arrow::{arrow, arrow_array, arrow_buffer, arrow_cast, arrow_data, arrow_ipc, arrow_ord, arrow_schema, arrow_select}` — previously only `arrow_schema` was re-exported. `arrow-buffer` is promoted from a transitive to a direct dependency. - `lancedb::datafusion` — `Expr` is a first-class part of the query and merge APIs (`only_if_expr`, `expr_projection`, `QueryFilter::Datafusion`, `when_matched_update_all_expr`), and `ExecutionPlan` is returned from `create_plan`. This follows DataFusion's own precedent of re-exporting `arrow`. The coupling already exists via the trait/impl bounds — this surfaces it rather than hiding it behind an `E0277`. Closes #3575 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-07-01 10:10:55 -07:00
Raphael Malikian	f94673ae5e	ci: update deprecated GitHub Actions to latest versions (Fixes #3577 ) (#3608 ) Fixes #3577 ## Problem GitHub Actions is deprecating Node.js 20 on its runners. Multiple workflows in lancedb use action versions that target Node.js 20 (`actions/checkout@v4`, `actions/setup-node@v4`, `actions/cache@v4`, `actions/upload-artifact@v4`, `actions/download-artifact@v4`, `pnpm/action-setup@v4`). These are being force-run on Node.js 24, generating deprecation warnings. ## Solution Updated all deprecated actions to their latest major versions that support Node.js 24: \| Action \| Old Version \| New Version \| \|--------\|------------\|-------------\| \| `actions/checkout` \| @v4 \| @v6 \| \| `actions/setup-node` \| @v4 \| @v6 \| \| `actions/cache` \| @v4 \| @v5 \| \| `actions/upload-artifact` \| @v4 \| @v7 \| \| `actions/download-artifact` \| @v4 \| @v8 \| \| `pnpm/action-setup` \| @v4 \| @v6 \| Note: `actions/checkout@v6` and `actions/upload-artifact@v7` are already used in `pypi-publish.yml` — this PR extends the same versions to all remaining workflows. ### Files Changed - `.github/workflows/npm-publish.yml` — Updated checkout, setup-node, cache, upload-artifact, download-artifact, pnpm - `.github/workflows/nodejs.yml` — Updated checkout, setup-node, pnpm - `.github/workflows/python.yml` — Updated checkout - `.github/workflows/rust.yml` — Updated checkout - `.github/workflows/java.yml` — Updated checkout - `.github/workflows/java-publish.yml` — Updated checkout - `.github/workflows/cargo-publish.yml` — Updated checkout - `.github/workflows/docs.yml` — Updated checkout, setup-node - `.github/workflows/dev.yml` — Updated setup-node - `.github/workflows/codex-fix-ci.yml` — Updated checkout, setup-node, pnpm - `.github/workflows/codex-update-lance-dependency.yml` — Updated checkout, setup-node - `.github/workflows/license-header-check.yml` — Updated checkout - `.github/workflows/make-release-commit.yml` — Updated checkout - `.github/workflows/update_package_lock_run.yml` — Updated checkout - `.github/workflows/update_package_lock_run_nodejs.yml` — Updated checkout ## Verification - All 20 YAML files validated with `yaml.safe_load()` — no syntax errors - GitHub Actions CI will validate the actual action versions at runtime ## Changelog \| Date \| Change \| Author \| \|------\|--------\|--------\| \| 2026-07-01 \| Updated all deprecated Node 20 actions to latest versions across 15 workflow files \| rtmalikian \| --- Disclosure: This code was developed with assistance from DeepSeek-v4-pro (DeepSeek) via Hermes Agent (Nous Research). All changes were reviewed and verified for correctness. Signed-off-by: rtmalikian <rtmalikian@gmail.com>	2026-07-01 09:38:26 -07:00
Jack Ye	3b70fc4c9d	fix(python): route async namespace connections through rust (#3603 ) Summary: - Route built-in async namespace-backed connections through the Rust namespace connector. - Delegate async namespace/table management methods to the inner AsyncConnection while keeping the custom implementation Python-client fallback. - Add regressions for the native async dir path and lazy namespace_client() construction. Validated locally with targeted namespace/db/table pytest, full test_namespace.py, ruff, cargo fmt/check/clippy, and cargo test -p lancedb-python.	2026-06-30 17:03:23 -07:00
Lance Release	3a7b02119b	Bump version: 0.31.0-beta.4 → 0.31.0-beta.5	2026-06-30 22:24:56 +00:00