Bump version: 0.31.0-beta.5 → 0.31.0-beta.6

Bump version: 0.34.0-beta.5 → 0.34.0-beta.6
fix(python): average MRR reciprocal ranks over all rankings (#3599 )
2026-07-02 18:40:40 +00:00 · 2026-07-02 11:33:53 +00:00 · 2026-07-02 11:32:45 +00:00 · 2026-07-01 15:36:56 -07:00 · 2026-07-01 10:28:58 -07:00 · 2026-07-01 10:11:58 -07:00
50 changed files with 594 additions and 196 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.31.0-beta.4"
+current_version = "0.31.0-beta.6"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -25,7 +25,7 @@ jobs:
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
@@ -47,7 +47,7 @@ jobs:
      contents: read
      issues: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/codex-fix-ci.yml
+++ b/.github/workflows/codex-fix-ci.yml
@@ -36,14 +36,14 @@ jobs:
          echo "guidelines = ${{ inputs.guidelines }}"

      - name: Checkout Repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.branch }}
          fetch-depth: 0
          persist-credentials: true

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          # pnpm 11 (used by the nodejs install step below) requires
          # Node >= 22.13; use 24 since 22 hits EOL in October.
@@ -82,7 +82,7 @@ jobs:
          cache: maven

      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Install Node.js dependencies for TypeScript bindings
--- a/.github/workflows/codex-update-lance-dependency.yml
+++ b/.github/workflows/codex-update-lance-dependency.yml
@@ -30,13 +30,13 @@ jobs:
          echo "tag = ${{ inputs.tag || 'latest' }}"

      - name: Checkout Repo LanceDB
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          persist-credentials: true

      - name: Set up Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: 20

--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -27,7 +27,7 @@ jobs:
    name: Verify PR title / description conforms to semantic-release
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@v6
        with:
          node-version: "18"
      # These rules are disabled because Github will always ensure there
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -35,7 +35,7 @@ jobs:
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Install dependencies needed for ubuntu
        run: |
          sudo apt install -y protobuf-compiler libssl-dev
@@ -53,7 +53,7 @@ jobs:
          python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
          python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
      - name: Set up node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: 20
          cache: 'npm'
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -32,7 +32,7 @@ jobs:
        working-directory: ./java
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Java 8
        uses: actions/setup-java@v4
        with:
@@ -73,7 +73,7 @@ jobs:
      contents: read
      issues: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -36,7 +36,7 @@ jobs:
        working-directory: ./java
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Set up Java 17
        uses: actions/setup-java@v4
        with:
--- a/.github/workflows/license-header-check.yml
+++ b/.github/workflows/license-header-check.yml
@@ -19,7 +19,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out code
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
      - name: Install license-header-checker
        working-directory: /tmp
        run: |
--- a/.github/workflows/make-release-commit.yml
+++ b/.github/workflows/make-release-commit.yml
@@ -49,7 +49,7 @@ jobs:
    steps:
      - name: Output Inputs
        run: echo "${{ toJSON(github.event.inputs) }}"
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -38,14 +38,14 @@ jobs:
      CC: gcc-12
      CXX: g++-12
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0
        lfs: true
-    - uses: pnpm/action-setup@v4
+    - uses: pnpm/action-setup@v6
      with:
        version: 11.1.1
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      with:
        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
        # in October. The library itself still supports Node >= 18
@@ -86,14 +86,14 @@ jobs:
        shell: bash
        working-directory: nodejs
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0
        lfs: true
-    - uses: pnpm/action-setup@v4
+    - uses: pnpm/action-setup@v6
      with:
        version: 11.1.1
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      name: Setup Node.js 24 for build
      with:
        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
@@ -130,7 +130,7 @@ jobs:
          echo "Run 'pnpm run docs', fix any warnings, and commit the changes."
          exit 1
        fi
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      name: Setup Node.js ${{ matrix.node-version }} for test
      with:
        node-version: ${{ matrix.node-version }}
@@ -166,14 +166,14 @@ jobs:
        shell: bash
        working-directory: nodejs
    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
      with:
        fetch-depth: 0
        lfs: true
-    - uses: pnpm/action-setup@v4
+    - uses: pnpm/action-setup@v6
      with:
        version: 11.1.1
-    - uses: actions/setup-node@v4
+    - uses: actions/setup-node@v6
      with:
        # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
        # in October.
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -32,7 +32,7 @@ jobs:
    permissions:
      contents: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -170,13 +170,13 @@ jobs:
      run:
        working-directory: nodejs
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
          # in October.
@@ -190,7 +190,7 @@ jobs:
          toolchain: stable
          targets: ${{ matrix.settings.target }}
      - name: Cache cargo
-        uses: actions/cache@v4
+        uses: actions/cache@v5
        with:
          path: |
            ~/.cargo/registry/index/
@@ -244,7 +244,7 @@ jobs:
        if: ${{ !matrix.settings.docker }}
        shell: bash
      - name: Upload artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: lancedb-${{ matrix.settings.target }}
          path: nodejs/dist/*.node
@@ -256,7 +256,7 @@ jobs:
        run: pnpm tsc
      - name: Upload Generic Artifacts
        if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
        with:
          name: nodejs-dist
          path: |
@@ -287,13 +287,13 @@ jobs:
        shell: bash
        working-directory: nodejs
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Setup Node.js 24 for install
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          # pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
          # in October.
@@ -303,18 +303,18 @@ jobs:
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
      - name: Setup Node.js ${{ matrix.node }} for test
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: ${{ matrix.node }}
      - name: Download artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v8
        with:
          name: lancedb-${{ matrix.settings.target }}
          path: nodejs/dist/
          # For testing purposes:
          # run-id: 13982782871
          # github-token: ${{ secrets.GITHUB_TOKEN }} # token with actions:read permissions on target repo
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
        with:
          name: nodejs-dist
          path: nodejs/dist
@@ -339,13 +339,13 @@ jobs:
    needs:
      - test-lancedb
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Setup pnpm
-        uses: pnpm/action-setup@v4
+        uses: pnpm/action-setup@v6
        with:
          version: 11.1.1
      - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v6
        with:
          node-version: 24
          cache: pnpm
@@ -353,14 +353,14 @@ jobs:
          registry-url: "https://registry.npmjs.org"
      - name: Install dependencies
        run: pnpm install --frozen-lockfile
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
        with:
          name: nodejs-dist
          path: nodejs/dist
          # For testing purposes:
          # run-id: 13982782871
          # github-token: ${{ secrets.GITHUB_TOKEN }} # token with actions:read permissions on target repo
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
        name: Download arch-specific binaries
        with:
          pattern: lancedb-*
@@ -398,7 +398,7 @@ jobs:
      contents: read
      issues: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: ./.github/actions/create-failure-issue
        with:
          job-results: ${{ toJSON(needs) }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -41,7 +41,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -66,7 +66,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -95,7 +95,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -126,7 +126,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -160,7 +160,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -189,7 +189,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -212,7 +212,7 @@ jobs:
        shell: bash
        working-directory: python
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -40,7 +40,7 @@ jobs:
      CC: clang-18
      CXX: clang++-18
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -65,7 +65,7 @@ jobs:
    timeout-minutes: 10
    runs-on: ubuntu-24.04
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - uses: EmbarkStudios/cargo-deny-action@v2
        with:
          command: check advisories bans licenses sources
@@ -78,7 +78,7 @@ jobs:
      CC: clang
      CXX: clang++
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      # Building without a lock file often requires the latest Rust version since downstream
      # dependencies may have updated their minimum Rust version.
      - uses: actions-rust-lang/setup-rust-toolchain@v1
@@ -113,7 +113,7 @@ jobs:
      CXX: clang++-18
      GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -152,7 +152,7 @@ jobs:
        shell: bash
        working-directory: rust
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: true
@@ -181,7 +181,7 @@ jobs:
      run:
        working-directory: rust/lancedb
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Set target
        run: rustup target add ${{ matrix.target }}
      - uses: Swatinem/rust-cache@v2
@@ -210,7 +210,7 @@ jobs:
      CC: clang-18
      CXX: clang++-18
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          submodules: true
      - name: Install dependencies
--- a/.github/workflows/update_package_lock_run.yml
+++ b/.github/workflows/update_package_lock_run.yml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: main
          persist-credentials: false
--- a/.github/workflows/update_package_lock_run_nodejs.yml
+++ b/.github/workflows/update_package_lock_run_nodejs.yml
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          ref: main
          persist-credentials: false
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5299,12 +5299,13 @@ dependencies = [

 [[package]]
 name = "lancedb"
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 dependencies = [
 "ahash",
 "anyhow",
 "arrow",
 "arrow-array",
+ "arrow-buffer",
 "arrow-cast",
 "arrow-data",
 "arrow-ipc",
@@ -5383,7 +5384,7 @@ dependencies = [

 [[package]]
 name = "lancedb-nodejs"
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -5408,7 +5409,7 @@ dependencies = [

 [[package]]
 name = "lancedb-python"
-version = "0.34.0-beta.4"
+version = "0.34.0-beta.6"
 dependencies = [
 "arrow",
 "async-trait",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,7 @@ ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "58.0.0", optional = false }
 arrow-array = "58.0.0"
+arrow-buffer = "58.0.0"
 arrow-data = "58.0.0"
 arrow-ipc = "58.0.0"
 arrow-ord = "58.0.0"
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.31.0-beta.4</version>
+    <version>0.31.0-beta.6</version>
 </dependency>
 ```

--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -518,6 +518,9 @@ x > 5 OR y = 'test'

 Filtering performance can often be improved by creating a scalar index
 on the filter column(s).
+
+Calling this multiple times combines the filters with a logical AND rather
+than replacing the previous filter.
 ```

 #### Inherited from
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -767,6 +767,9 @@ x > 5 OR y = 'test'

 Filtering performance can often be improved by creating a scalar index
 on the filter column(s).
+
+Calling this multiple times combines the filters with a logical AND rather
+than replacing the previous filter.
 ```

 #### Inherited from
--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.31.0-beta.4</version>
+      <version>0.31.0-beta.6</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.31.0-beta.4</version>
+    <version>0.31.0-beta.6</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 publish = false
 license.workspace = true
 description.workspace = true
--- a/nodejs/test/query.test.ts
+++ b/nodejs/test/query.test.ts
@@ -215,6 +215,20 @@ describe("Query orderBy", () => {
    expect(results[2].score).toBeCloseTo(4.1, 0.001);
  });

+  it("should combine repeated where clauses with AND", async () => {
+    const results = await table
+      .query()
+      .where("score > 1.0")
+      .where("score < 3.0")
+      .orderBy({ columnName: "score" })
+      .toArray();
+    // Only rows matching both predicates should be returned, rather than the
+    // second where() silently replacing the first.
+    expect(results.length).toBe(2);
+    expect(results[0].score).toBeCloseTo(1.2, 0.001);
+    expect(results[1].score).toBeCloseTo(2.8, 0.001);
+  });
+
  it("should support method chaining with limit", async () => {
    const results = await table
      .query()
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -362,6 +362,9 @@ export class StandardQueryBase<
   *
   * Filtering performance can often be improved by creating a scalar index
   * on the filter column(s).
+   *
+   * Calling this multiple times combines the filters with a logical AND rather
+   * than replacing the previous filter.
   */
  where(predicate: string): this {
    this.doCall((inner: NativeQueryType) => inner.onlyIf(predicate));
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.31.0-beta.4",
+  "version": "0.31.0-beta.6",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.31.0-beta.4",
+	"version": "0.31.0-beta.6",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.31.0-beta.4",
+  "version": "0.31.0-beta.6",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.31.0-beta.4",
+      "version": "0.31.0-beta.6",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.31.0-beta.4",
+  "version": "0.31.0-beta.6",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.34.0-beta.5"
+current_version = "0.34.0-beta.6"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.34.0-beta.5"
+version = "0.34.0-beta.6"
 publish = false
 edition.workspace = true
 description = "Python bindings for LanceDB"
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -51,6 +51,15 @@ class LanceMergeInsertBuilder(object):
        If there are multiple matches then the behavior is undefined.
        Currently this causes multiple copies of the row to be created
        but that behavior is subject to change.
+
+        Parameters
+        ----------
+        where: Optional[str], default None
+            An optional filter to limit which rows are updated. Column
+            references in this expression must be prefixed with "target."
+            to refer to the existing table data. For example, to only
+            update rows where the existing color is red, use:
+            ``where="target.color = 'red'"``
        """
        self._when_matched_update_all = True
        self._when_matched_update_all_condition = where
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
@@ -45,11 +45,6 @@ from lancedb._lancedb import (
 )
 from lancedb.background_loop import LOOP
 from lancedb.db import AsyncConnection, DBConnection
-from lancedb.namespace_utils import (
-    _normalize_create_namespace_mode,
-    _normalize_drop_namespace_mode,
-    _normalize_drop_namespace_behavior,
-)
 from lance_namespace import (
    LanceNamespace,
    connect as namespace_connect,
@@ -58,13 +53,6 @@ from lance_namespace import (
    DropNamespaceResponse,
    ListNamespacesResponse,
    ListTablesResponse,
-    ListTablesRequest,
-    DescribeNamespaceRequest,
-    DropTableRequest,
-    RenameTableRequest,
-    ListNamespacesRequest,
-    CreateNamespaceRequest,
-    DropNamespaceRequest,
 )
 from lancedb.table import AsyncTable, LanceTable, Table
 from lancedb.util import validate_table_name
@@ -389,7 +377,7 @@ def _builds_namespace_natively(
    return namespace_client_impl == "rest" and bool(namespace_client_properties)


-def _supports_native_sync_namespace(namespace_client_impl: str) -> bool:
+def _supports_native_namespace(namespace_client_impl: str) -> bool:
    return namespace_client_impl in {"dir", "rest"}


@@ -412,7 +400,6 @@ class LanceNamespaceDBConnection(DBConnection):
        namespace_client_impl: Optional[str] = None,
        namespace_client_properties: Optional[Dict[str, str]] = None,
        _inner: Optional[AsyncConnection] = None,
-        _route_pushdown_to_rust: Optional[bool] = None,
    ):
        """
        Initialize a namespace-based LanceDB connection.
@@ -454,16 +441,12 @@ class LanceNamespaceDBConnection(DBConnection):
        )
        self._namespace_client_impl = namespace_client_impl
        self._namespace_client_properties = namespace_client_properties
-        # When the namespace client is built natively (see Rust
-        # ``build_namespace_natively``), the underlying Rust table performs
-        # QueryTable pushdown through the read-freshness context provider, which
-        # the pure-Python ``query_table`` path bypasses.
-        self._route_pushdown_to_rust = (
-            _route_pushdown_to_rust
-            if _route_pushdown_to_rust is not None
-            else _builds_namespace_natively(
-                namespace_client_impl, namespace_client_properties
-            )
+        # When the namespace connection or client is built natively in Rust, the
+        # underlying Rust table performs QueryTable pushdown through the
+        # read-freshness context provider, which the pure-Python ``query_table``
+        # path bypasses.
+        self._route_pushdown_to_rust = _inner is not None or _builds_namespace_natively(
+            namespace_client_impl, namespace_client_properties
        )
        if _inner is not None:
            self._inner = _inner
@@ -909,7 +892,7 @@ class AsyncLanceNamespaceDBConnection:

    def __init__(
        self,
-        namespace_client: LanceNamespace,
+        namespace_client: Optional[LanceNamespace] = None,
        *,
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
@@ -917,6 +900,7 @@ class AsyncLanceNamespaceDBConnection:
        namespace_client_pushdown_operations: Optional[List[str]] = None,
        namespace_client_impl: Optional[str] = None,
        namespace_client_properties: Optional[Dict[str, str]] = None,
+        _inner: Optional[AsyncConnection] = None,
    ):
        """
        Initialize an async namespace-based LanceDB connection.
@@ -958,29 +942,35 @@ class AsyncLanceNamespaceDBConnection:
        )
        self._namespace_client_impl = namespace_client_impl
        self._namespace_client_properties = namespace_client_properties
-        # See LanceNamespaceDBConnection: when built natively the Rust table runs
-        # QueryTable pushdown through the read-freshness provider, so defer to it
-        # rather than the urllib3 client (which omits x-lancedb-min-timestamp).
-        self._route_pushdown_to_rust = _builds_namespace_natively(
+        # See LanceNamespaceDBConnection: when Rust owns the namespace
+        # connection/client, its table performs QueryTable pushdown through the
+        # read-freshness provider, so defer to it rather than the urllib3 client
+        # path (which omits x-lancedb-min-timestamp).
+        self._route_pushdown_to_rust = _inner is not None or _builds_namespace_natively(
            namespace_client_impl, namespace_client_properties
        )
-        self._inner = AsyncConnection(
-            _connect_namespace_client(
-                namespace_client,
-                read_consistency_interval=(
-                    read_consistency_interval.total_seconds()
-                    if read_consistency_interval is not None
-                    else None
-                ),
-                storage_options=self.storage_options or None,
-                session=session,
-                namespace_client_pushdown_operations=(
-                    list(self._namespace_client_pushdown_operations)
-                ),
-                namespace_client_impl=namespace_client_impl,
-                namespace_client_properties=namespace_client_properties,
+        if _inner is not None:
+            self._inner = _inner
+        else:
+            if namespace_client is None:
+                raise ValueError("namespace_client is required without a native _inner")
+            self._inner = AsyncConnection(
+                _connect_namespace_client(
+                    namespace_client,
+                    read_consistency_interval=(
+                        read_consistency_interval.total_seconds()
+                        if read_consistency_interval is not None
+                        else None
+                    ),
+                    storage_options=self.storage_options or None,
+                    session=session,
+                    namespace_client_pushdown_operations=(
+                        list(self._namespace_client_pushdown_operations)
+                    ),
+                    namespace_client_impl=namespace_client_impl,
+                    namespace_client_properties=namespace_client_properties,
+                )
            )
-        )

    async def table_names(
        self,
@@ -1004,11 +994,9 @@ class AsyncLanceNamespaceDBConnection:
        )
        if namespace_path is None:
            namespace_path = []
-        request = ListTablesRequest(
-            id=namespace_path, page_token=page_token, limit=limit
+        return await self._inner.table_names(
+            namespace_path=namespace_path, start_after=page_token, limit=limit
        )
-        response = self._namespace_client.list_tables(request)
-        return response.tables if response.tables else []

    async def create_table(
        self,
@@ -1071,8 +1059,8 @@ class AsyncLanceNamespaceDBConnection:
                storage_options=storage_options,
                index_cache_size=index_cache_size,
            )
-        except RuntimeError as e:
-            if "Table not found" in str(e):
+        except (RuntimeError, ValueError) as e:
+            if "Table not found" in str(e) or "was not found" in str(e):
                table_id = namespace_path + [name]
                raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
            raise
@@ -1093,9 +1081,7 @@ class AsyncLanceNamespaceDBConnection:
        """Drop a table from the namespace."""
        if namespace_path is None:
            namespace_path = []
-        table_id = namespace_path + [name]
-        request = DropTableRequest(id=table_id)
-        self._namespace_client.drop_table(request)
+        await self._inner.drop_table(name, namespace_path=namespace_path)

    async def rename_table(
        self,
@@ -1109,14 +1095,17 @@ class AsyncLanceNamespaceDBConnection:
            cur_namespace_path = []
        if new_namespace_path is None:
            new_namespace_path = []
-        cur_table_id = cur_namespace_path + [cur_name]
-        new_namespace_id = new_namespace_path if new_namespace_path else None
-        request = RenameTableRequest(
-            id=cur_table_id,
-            new_table_name=new_name,
-            new_namespace_id=new_namespace_id,
-        )
-        self._namespace_client.rename_table(request)
+        try:
+            await self._inner.rename_table(
+                cur_name,
+                new_name,
+                cur_namespace_path=cur_namespace_path,
+                new_namespace_path=new_namespace_path,
+            )
+        except RuntimeError as e:
+            if "rename_table not implemented" in str(e):
+                raise NotImplementedError("rename_table not implemented") from e
+            raise

    async def drop_database(self):
        """Deprecated method."""
@@ -1128,9 +1117,7 @@ class AsyncLanceNamespaceDBConnection:
        """Drop all tables in the namespace."""
        if namespace_path is None:
            namespace_path = []
-        table_names = await self.table_names(namespace_path=namespace_path)
-        for table_name in table_names:
-            await self.drop_table(table_name, namespace_path=namespace_path)
+        await self._inner.drop_all_tables(namespace_path=namespace_path)

    async def list_namespaces(
        self,
@@ -1159,13 +1146,8 @@ class AsyncLanceNamespaceDBConnection:
        """
        if namespace_path is None:
            namespace_path = []
-        request = ListNamespacesRequest(
-            id=namespace_path, page_token=page_token, limit=limit
-        )
-        response = self._namespace_client.list_namespaces(request)
-        return ListNamespacesResponse(
-            namespaces=response.namespaces if response.namespaces else [],
-            page_token=response.page_token,
+        return await self._inner.list_namespaces(
+            namespace_path=namespace_path, page_token=page_token, limit=limit
        )

    async def create_namespace(
@@ -1192,15 +1174,11 @@ class AsyncLanceNamespaceDBConnection:
        CreateNamespaceResponse
            Response containing the properties of the created namespace.
        """
-        request = CreateNamespaceRequest(
-            id=namespace_path,
-            mode=_normalize_create_namespace_mode(mode),
+        return await self._inner.create_namespace(
+            namespace_path=namespace_path,
+            mode=mode,
            properties=properties,
        )
-        response = self._namespace_client.create_namespace(request)
-        return CreateNamespaceResponse(
-            properties=response.properties if hasattr(response, "properties") else None
-        )

    async def drop_namespace(
        self,
@@ -1226,20 +1204,16 @@ class AsyncLanceNamespaceDBConnection:
        DropNamespaceResponse
            Response containing properties and transaction_id if applicable.
        """
-        request = DropNamespaceRequest(
-            id=namespace_path,
-            mode=_normalize_drop_namespace_mode(mode),
-            behavior=_normalize_drop_namespace_behavior(behavior),
-        )
-        response = self._namespace_client.drop_namespace(request)
-        return DropNamespaceResponse(
-            properties=(
-                response.properties if hasattr(response, "properties") else None
-            ),
-            transaction_id=(
-                response.transaction_id if hasattr(response, "transaction_id") else None
-            ),
-        )
+        try:
+            return await self._inner.drop_namespace(
+                namespace_path=namespace_path,
+                mode=mode,
+                behavior=behavior,
+            )
+        except RuntimeError as e:
+            if "Namespace not empty" in str(e):
+                raise NamespaceNotEmptyError(str(e)) from e
+            raise

    async def describe_namespace(
        self, namespace_path: List[str]
@@ -1257,11 +1231,7 @@ class AsyncLanceNamespaceDBConnection:
        DescribeNamespaceResponse
            Response containing the namespace properties.
        """
-        request = DescribeNamespaceRequest(id=namespace_path)
-        response = self._namespace_client.describe_namespace(request)
-        return DescribeNamespaceResponse(
-            properties=response.properties if hasattr(response, "properties") else None
-        )
+        return await self._inner.describe_namespace(namespace_path)

    async def list_tables(
        self,
@@ -1290,13 +1260,8 @@ class AsyncLanceNamespaceDBConnection:
        """
        if namespace_path is None:
            namespace_path = []
-        request = ListTablesRequest(
-            id=namespace_path, page_token=page_token, limit=limit
-        )
-        response = self._namespace_client.list_tables(request)
-        return ListTablesResponse(
-            tables=response.tables if response.tables else [],
-            page_token=response.page_token,
+        return await self._inner.list_tables(
+            namespace_path=namespace_path, page_token=page_token, limit=limit
        )

    async def namespace_client(self) -> LanceNamespace:
@@ -1310,6 +1275,18 @@ class AsyncLanceNamespaceDBConnection:
        LanceNamespace
            The namespace client for this connection.
        """
+        if self._namespace_client is None:
+            if (
+                self._namespace_client_impl is None
+                or self._namespace_client_properties is None
+            ):
+                raise ValueError(
+                    "Cannot construct a Python namespace client without "
+                    "namespace implementation properties"
+                )
+            self._namespace_client = namespace_connect(
+                self._namespace_client_impl, self._namespace_client_properties
+            )
        return self._namespace_client


@@ -1360,7 +1337,7 @@ def connect_namespace(
    LanceNamespaceDBConnection
        A namespace-based connection to LanceDB
    """
-    if _supports_native_sync_namespace(namespace_client_impl):
+    if _supports_native_namespace(namespace_client_impl):
        inner = AsyncConnection(
            _connect_namespace(
                namespace_client_impl,
@@ -1384,7 +1361,6 @@ def connect_namespace(
            namespace_client_impl=namespace_client_impl,
            namespace_client_properties=namespace_client_properties,
            _inner=inner,
-            _route_pushdown_to_rust=True,
        )

    namespace_client = namespace_connect(
@@ -1462,6 +1438,32 @@ def connect_namespace_async(
    ...     tables = await db.table_names()
    ...     table = await db.create_table("my_table", schema=schema)
    """
+    if _supports_native_namespace(namespace_client_impl):
+        inner = AsyncConnection(
+            _connect_namespace(
+                namespace_client_impl,
+                namespace_client_properties,
+                read_consistency_interval=(
+                    read_consistency_interval.total_seconds()
+                    if read_consistency_interval is not None
+                    else None
+                ),
+                storage_options=storage_options,
+                session=session,
+                namespace_client_pushdown_operations=namespace_client_pushdown_operations,
+            )
+        )
+        return AsyncLanceNamespaceDBConnection(
+            namespace_client=None,
+            read_consistency_interval=read_consistency_interval,
+            storage_options=storage_options,
+            session=session,
+            namespace_client_pushdown_operations=namespace_client_pushdown_operations,
+            namespace_client_impl=namespace_client_impl,
+            namespace_client_properties=namespace_client_properties,
+            _inner=inner,
+        )
+
    namespace_client = namespace_connect(
        namespace_client_impl, namespace_client_properties
    )
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -119,6 +119,27 @@ def _filter_to_sql(filter: Optional[Union[str, Expr]]) -> Optional[str]:
    return filter


+def _combine_where(
+    existing: Optional[Union[str, Expr]], new: Union[str, Expr]
+) -> Union[str, Expr]:
+    """Combine a new filter with an existing one using a logical AND.
+
+    Calling ``where`` more than once composes the filters with AND instead of
+    replacing the previous filter. Two :class:`~lancedb.expr.Expr` filters are
+    combined as an expression; otherwise both filters are lowered to SQL strings
+    and combined as SQL.
+    """
+    if existing is None:
+        return new
+    existing_is_expr = isinstance(existing, Expr)
+    new_is_expr = isinstance(new, Expr)
+    if existing_is_expr and new_is_expr:
+        return existing & new
+    existing_sql = existing.to_sql() if existing_is_expr else existing
+    new_sql = new.to_sql() if new_is_expr else new
+    return f"({existing_sql}) AND ({new_sql})"
+
+
 def _projection_to_scanner_kwargs(
    columns: Optional[
        Union[
@@ -1148,8 +1169,13 @@ class LanceQueryBuilder(ABC):
        -------
        LanceQueryBuilder
            The LanceQueryBuilder object.
+
+        Notes
+        -----
+        Calling this multiple times combines the filters with a logical AND
+        rather than replacing the previous filter.
        """
-        self._where = where
+        self._where = _combine_where(self._where, where)
        self._postfilter = not prefilter
        return self

@@ -1693,8 +1719,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        -------
        LanceQueryBuilder
            The LanceQueryBuilder object.
+
+        Notes
+        -----
+        Calling this multiple times combines the filters with a logical AND
+        rather than replacing the previous filter.
        """
-        self._where = where
+        self._where = _combine_where(self._where, where)
        if prefilter is not None:
            self._postfilter = not prefilter
        return self
@@ -2894,6 +2925,9 @@ class AsyncStandardQuery(AsyncQueryBase):

        Filtering performance can often be improved by creating a scalar index
        on the filter column(s).
+
+        Calling this multiple times combines the filters with a logical AND
+        rather than replacing the previous filter.
        """
        if isinstance(predicate, Expr):
            self._inner.where_expr(predicate._inner)
--- a/python/python/lancedb/rerankers/mrr.py
+++ b/python/python/lancedb/rerankers/mrr.py
@@ -156,9 +156,16 @@ class MRRReranker(Reranker):
                reciprocal_rank = 1.0 / rank
                mrr_score_map[result_id].append(reciprocal_rank)

+        # MRR averages the reciprocal rank across *all* ranking systems, treating
+        # a system in which a document does not appear as a reciprocal rank of 0.
+        # We therefore divide by the total number of systems, not by the number of
+        # systems the document happens to appear in -- otherwise a document found
+        # by a single ranking would outrank one ranked highly by every system,
+        # defeating the purpose of fusing the rankings.
+        num_systems = len(vector_results)
        final_mrr_scores = {}
        for result_id, reciprocal_ranks in mrr_score_map.items():
-            mean_rr = np.mean(reciprocal_ranks)
+            mean_rr = float(np.sum(reciprocal_ranks)) / num_systems
            final_mrr_scores[result_id] = mean_rr

        combined = pa.concat_tables(vector_results, **self._concat_tables_args)
--- a/python/python/tests/test_namespace.py
+++ b/python/python/tests/test_namespace.py
@@ -599,6 +599,61 @@ class TestAsyncNamespaceConnection:
        table_names = await db.table_names()
        assert len(list(table_names)) == 0

+    async def test_async_builtin_namespace_uses_rust_without_python_client(
+        self, monkeypatch
+    ):
+        """Built-in async namespace connections should not construct or call the
+        Python namespace client for normal namespace/table management."""
+        namespace_module = importlib.import_module("lancedb.namespace")
+
+        def fail_namespace_connect(*args, **kwargs):
+            raise AssertionError("Python namespace client should not be constructed")
+
+        monkeypatch.setattr(
+            namespace_module, "namespace_connect", fail_namespace_connect
+        )
+
+        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
+        assert isinstance(db, lancedb.AsyncLanceNamespaceDBConnection)
+        assert db._namespace_client is None
+        assert db._route_pushdown_to_rust is True
+
+        await db.create_namespace(["test_ns"])
+        assert "test_ns" in (await db.list_namespaces()).namespaces
+
+        schema = pa.schema([pa.field("id", pa.int64())])
+        table = await db.create_table(
+            "test_table", schema=schema, namespace_path=["test_ns"]
+        )
+        assert table._namespace_path == ["test_ns"]
+        assert table._namespace_client is None
+        assert table._route_pushdown_to_rust is True
+        assert "test_table" in await db.table_names(namespace_path=["test_ns"])
+        assert "test_table" in (await db.list_tables(namespace_path=["test_ns"])).tables
+
+        opened = await db.open_table("test_table", namespace_path=["test_ns"])
+        assert opened._namespace_path == ["test_ns"]
+
+        await db.drop_table("test_table", namespace_path=["test_ns"])
+        assert (await db.list_tables(namespace_path=["test_ns"])).tables == []
+        await db.drop_namespace(["test_ns"])
+        assert "test_ns" not in (await db.list_namespaces()).namespaces
+
+    async def test_async_namespace_client_is_lazy(self):
+        """namespace_client() should still return the backing client on demand."""
+        pytest.importorskip("lance")
+        from lance.namespace import DirectoryNamespace
+
+        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
+        assert db._namespace_client is None
+
+        ns_client = await db.namespace_client()
+
+        assert isinstance(ns_client, DirectoryNamespace)
+        namespace_id = ns_client.namespace_id().replace("\\\\", "\\")
+        assert str(self.temp_dir) in namespace_id
+        assert db._namespace_client is ns_client
+
    # Async connect via namespace helper is not enabled yet.

    async def test_create_table_async(self):
@@ -870,10 +925,11 @@ class TestPushdownOperations:
        )
        assert db._route_pushdown_to_rust is True

-    def test_async_route_pushdown_to_rust_false_for_dir(self):
-        """The async non-native (dir) connection keeps the Python pushdown path."""
+    def test_async_route_pushdown_to_rust_for_native_dir(self):
+        """The async dir connection is natively built and defers QueryTable
+        pushdown to Rust."""
        db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
-        assert db._route_pushdown_to_rust is False
+        assert db._route_pushdown_to_rust is True

    def test_lance_table_to_arrow_uses_query_pushdown(self):
        namespace_client = _NamespaceClient()
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -502,6 +502,61 @@ def test_with_row_id(table: lancedb.table.Table):
    assert rs["_rowid"].to_pylist() == [0, 1]


+def test_where_repeated_combines_with_and(table: lancedb.table.Table):
+    # Calling where() more than once should AND the filters together instead of
+    # silently replacing the previous one (regression test for #2649).
+    builder = table.search().where("id >= 1").where("id < 2")
+    assert builder._where == "(id >= 1) AND (id < 2)"
+
+    ids = [row["id"] for row in builder.limit(10).to_list()]
+    assert ids == [1]
+
+
+def test_where_repeated_combines_expr(table: lancedb.table.Table):
+    from lancedb.expr import col, lit
+
+    builder = table.search().where(col("id") >= lit(1)).where(col("id") < lit(2))
+    ids = [row["id"] for row in builder.limit(10).to_list()]
+    assert ids == [1]
+
+
+def test_where_mixed_filter_kinds_combines(table: lancedb.table.Table):
+    # Mixing a SQL string filter with an expression filter lowers the
+    # expression to SQL and combines them as SQL strings.
+    from lancedb.expr import col, lit
+
+    builder = table.search().where("id >= 1").where(col("id") < lit(2))
+    ids = [row["id"] for row in builder.limit(10).to_list()]
+    assert ids == [1]
+
+
+@pytest.mark.asyncio
+async def test_where_repeated_combines_with_and_async(table_async: AsyncTable):
+    ids = [
+        row["id"]
+        for row in (
+            await table_async.query().where("id >= 1").where("id < 2").to_list()
+        )
+    ]
+    assert ids == [1]
+
+
+@pytest.mark.asyncio
+async def test_where_mixed_filter_kinds_combines_async(table_async: AsyncTable):
+    from lancedb.expr import col, lit
+
+    ids = [
+        row["id"]
+        for row in (
+            await table_async.query()
+            .where("id >= 1")
+            .where(col("id") < lit(2))
+            .to_list()
+        )
+    ]
+    assert ids == [1]
+
+
 def test_distance_range(table: lancedb.table.Table):
    q = [0, 0]
    rs = table.search(q).to_arrow()
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -350,6 +350,38 @@ def test_mrr_reranker_empty_input():
        reranker.rerank_multivector([])


+def test_mrr_multivector_rewards_consensus():
+    # Reciprocal ranks must be averaged across *all* ranking systems, treating a
+    # missing system as 0. A document ranked first by every system must outrank a
+    # document ranked first by only one of them.
+    reranker = MRRReranker()
+
+    def ranking(row_ids):
+        return pa.table({"_rowid": pa.array(row_ids, type=pa.int64())})
+
+    # Doc 1 is rank 1 in only the first system; doc 2 is rank 1 in two systems
+    # and rank 2 in the third (strong cross-system consensus).
+    rs1 = ranking([1, 2, 3])
+    rs2 = ranking([2, 3, 4])
+    rs3 = ranking([2, 5, 6])
+
+    result = reranker.rerank_multivector([rs1, rs2, rs3])
+    scores = {
+        row_id: score
+        for row_id, score in zip(
+            result["_rowid"].to_pylist(),
+            result["_relevance_score"].to_pylist(),
+        )
+    }
+
+    # sum of reciprocal ranks / number of systems
+    assert scores[1] == pytest.approx(1.0 / 3)
+    assert scores[2] == pytest.approx((0.5 + 1.0 + 1.0) / 3)
+    assert scores[2] > scores[1]
+    # The consensus document ranks first overall.
+    assert result["_rowid"].to_pylist()[0] == 2
+
+
 def test_rrf_reranker_distance():
    data = pa.table(
        {
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.31.0-beta.4"
+version = "0.31.0-beta.6"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -14,6 +14,7 @@ rust-version.workspace = true
 ahash = { workspace = true }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true }
--- a/rust/lancedb/src/arrow.rs
+++ b/rust/lancedb/src/arrow.rs
@@ -3,7 +3,19 @@

 use std::{pin::Pin, sync::Arc};

+// Re-export the arrow crates we depend on so downstream consumers can build
+// `RecordBatch`/arrays/builders against the exact same arrow line lancedb was
+// compiled against, instead of declaring their own (potentially mismatched)
+// direct arrow dependencies. See https://github.com/lancedb/lancedb/issues/3575.
+pub use arrow;
+pub use arrow_array;
+pub use arrow_buffer;
+pub use arrow_cast;
+pub use arrow_data;
+pub use arrow_ipc;
+pub use arrow_ord;
 pub use arrow_schema;
+pub use arrow_select;
 use datafusion_common::DataFusionError;
 use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
 use futures::{Stream, StreamExt, TryStreamExt};
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -342,3 +342,9 @@ pub use connection::connect_namespace;
 /// Re-export Lance Session and ObjectStoreRegistry for custom session creation
 pub use lance::session::Session;
 pub use lance_io::object_store::ObjectStoreRegistry;
+
+/// Re-export DataFusion so consumers can build the `Expr` values that public
+/// query/merge APIs (e.g. [`query::QueryBase::only_if_expr`]) accept without
+/// declaring their own (potentially mismatched) direct `datafusion` dependency.
+/// See <https://github.com/lancedb/lancedb/issues/3575>.
+pub use datafusion;
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -401,6 +401,9 @@ pub trait QueryBase {
    ///
    /// Filtering performance can often be improved by creating a scalar index
    /// on the filter column(s).
+    ///
+    /// Calling this multiple times combines the filters with a logical AND
+    /// (i.e. `(previous) AND (new)`) rather than replacing the previous filter.
    fn only_if(self, filter: impl AsRef<str>) -> Self;

    /// Only return rows which match the filter, using an expression builder.
@@ -423,6 +426,9 @@ pub trait QueryBase {
    ///
    /// Note: Expression filters are not supported for remote/server-side queries.
    /// Use [`QueryBase::only_if`] with SQL strings for remote tables.
+    ///
+    /// Calling this multiple times combines the expressions with a logical AND
+    /// rather than replacing the previous filter.
    fn only_if_expr(self, filter: datafusion_expr::Expr) -> Self;

    /// Perform a full text search on the table.
@@ -535,12 +541,13 @@ impl<T: HasQuery> QueryBase for T {
    }

    fn only_if(mut self, filter: impl AsRef<str>) -> Self {
-        self.mut_query().filter = Some(QueryFilter::Sql(filter.as_ref().to_string()));
+        self.mut_query()
+            .add_filter(QueryFilter::Sql(filter.as_ref().to_string()));
        self
    }

    fn only_if_expr(mut self, filter: datafusion_expr::Expr) -> Self {
-        self.mut_query().filter = Some(QueryFilter::Datafusion(filter));
+        self.mut_query().add_filter(QueryFilter::Datafusion(filter));
        self
    }

@@ -716,6 +723,39 @@ pub enum QueryFilter {
    Datafusion(Expr),
 }

+/// Combine two filters with a logical AND.
+///
+/// This is used when a query receives more than one filter (for example when
+/// `where`/`only_if` is called multiple times) so the filters are composed
+/// with AND rather than the later filter silently replacing the earlier one.
+///
+/// SQL string and expression filters are combined within their own
+/// representation. When the two representations are mixed, the expression is
+/// lowered to SQL (via [`crate::expr::expr_to_sql_string`]) and the filters are
+/// combined as SQL strings. Substrait filters cannot be combined and return an
+/// error.
+fn and_filters(existing: QueryFilter, new: QueryFilter) -> Result<QueryFilter> {
+    match (existing, new) {
+        (QueryFilter::Sql(lhs), QueryFilter::Sql(rhs)) => {
+            Ok(QueryFilter::Sql(format!("({lhs}) AND ({rhs})")))
+        }
+        (QueryFilter::Datafusion(lhs), QueryFilter::Datafusion(rhs)) => {
+            Ok(QueryFilter::Datafusion(lhs.and(rhs)))
+        }
+        (QueryFilter::Sql(lhs), QueryFilter::Datafusion(rhs)) => {
+            let rhs = crate::expr::expr_to_sql_string(&rhs)?;
+            Ok(QueryFilter::Sql(format!("({lhs}) AND ({rhs})")))
+        }
+        (QueryFilter::Datafusion(lhs), QueryFilter::Sql(rhs)) => {
+            let lhs = crate::expr::expr_to_sql_string(&lhs)?;
+            Ok(QueryFilter::Sql(format!("({lhs}) AND ({rhs})")))
+        }
+        _ => Err(Error::InvalidInput {
+            message: "cannot combine a Substrait filter with another filter".to_string(),
+        }),
+    }
+}
+
 /// A basic query into a table without any kind of search
 ///
 /// This will result in a (potentially filtered) scan if executed
@@ -730,6 +770,13 @@ pub struct QueryRequest {
    /// Apply filter to the returned rows.
    pub filter: Option<QueryFilter>,

+    /// An error recorded while combining repeated filters that could not be
+    /// composed (see [`QueryRequest::add_filter`]). It is surfaced when the
+    /// query is executed via [`QueryRequest::check_filter`]. We defer the error
+    /// because the builder methods that set filters return `Self` rather than a
+    /// `Result`.
+    pub(crate) filter_error: Option<String>,
+
    /// Perform a full text search on the table.
    pub full_text_search: Option<FullTextSearchQuery>,

@@ -775,6 +822,7 @@ impl Default for QueryRequest {
            limit: None,
            offset: None,
            filter: None,
+            filter_error: None,
            full_text_search: None,
            select: Select::All,
            fast_search: false,
@@ -788,6 +836,41 @@ impl Default for QueryRequest {
    }
 }

+impl QueryRequest {
+    /// Add a filter, combining it with any existing filter using a logical AND.
+    ///
+    /// If the new filter cannot be combined with the existing one (because they
+    /// use different representations) the error is recorded and surfaced later
+    /// by [`Self::check_filter`].
+    pub(crate) fn add_filter(&mut self, new: QueryFilter) {
+        self.filter = Some(match self.filter.take() {
+            None => new,
+            Some(existing) => match and_filters(existing, new) {
+                Ok(combined) => combined,
+                Err(err) => {
+                    // The filters were consumed while attempting to combine
+                    // them; the recorded error is surfaced by `check_filter`
+                    // before the query executes.
+                    self.filter_error = Some(err.to_string());
+                    return;
+                }
+            },
+        });
+    }
+
+    /// Return an error if combining filters failed (see [`Self::add_filter`]).
+    ///
+    /// This must be called by every backend before executing a query.
+    pub(crate) fn check_filter(&self) -> Result<()> {
+        if let Some(message) = &self.filter_error {
+            return Err(Error::InvalidInput {
+                message: message.clone(),
+            });
+        }
+        Ok(())
+    }
+}
+
 /// A builder for LanceDB queries.
 ///
 /// See [`crate::Table::query`] for more details on queries
@@ -1682,6 +1765,70 @@ mod tests {
        }
    }

+    #[tokio::test]
+    async fn test_repeated_only_if_combines_with_and() {
+        use crate::expr::{col, lit};
+
+        let tmp_dir = tempdir().unwrap();
+        let dataset_path = tmp_dir.path().join("test.lance");
+        let uri = dataset_path.to_str().unwrap();
+
+        let conn = connect(uri).execute().await.unwrap();
+        let table = conn
+            .create_table("my_table", make_non_empty_batches())
+            .execute()
+            .await
+            .unwrap();
+
+        let query = table.query().only_if("id > 0").only_if("id < 100");
+        match &query.request.filter {
+            Some(QueryFilter::Sql(sql)) => assert_eq!(sql, "(id > 0) AND (id < 100)"),
+            other => panic!("expected combined SQL filter, got {other:?}"),
+        }
+
+        // A single filter is left untouched.
+        let query = table.query().only_if("id > 0");
+        match &query.request.filter {
+            Some(QueryFilter::Sql(sql)) => assert_eq!(sql, "id > 0"),
+            other => panic!("expected single SQL filter, got {other:?}"),
+        }
+
+        // Expression filters are combined with a logical AND as well.
+        let query = table
+            .query()
+            .only_if_expr(col("id").gt(lit(0i32)))
+            .only_if_expr(col("id").lt(lit(100i32)));
+        match &query.request.filter {
+            Some(QueryFilter::Datafusion(expr)) => {
+                assert_eq!(
+                    expr,
+                    &col("id").gt(lit(0i32)).and(col("id").lt(lit(100i32)))
+                );
+            }
+            other => panic!("expected combined Datafusion filter, got {other:?}"),
+        }
+
+        // Mixing an SQL string filter with an expression filter lowers the
+        // expression to SQL and combines them as SQL strings.
+        let query = table
+            .query()
+            .only_if("id > 0")
+            .only_if_expr(col("id").lt(lit(100i32)));
+        match &query.request.filter {
+            Some(QueryFilter::Sql(sql)) => {
+                let expected = format!(
+                    "(id > 0) AND ({})",
+                    crate::expr::expr_to_sql_string(&col("id").lt(lit(100i32))).unwrap()
+                );
+                assert_eq!(sql, &expected);
+            }
+            other => panic!("expected combined SQL filter, got {other:?}"),
+        }
+        assert!(query.request.check_filter().is_ok());
+        // The combined filter executes without error.
+        query.execute().await.unwrap();
+    }
+
    #[tokio::test]
    async fn test_select_with_transform() {
        // TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -612,6 +612,7 @@ impl<S: HttpSend> RemoteTable<S> {
        body: &mut serde_json::Value,
        params: &QueryRequest,
    ) -> Result<()> {
+        params.check_filter()?;
        body["prefilter"] = params.prefilter.into();
        if let Some(offset) = params.offset {
            body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset));
--- a/rust/lancedb/src/table/query.rs
+++ b/rust/lancedb/src/table/query.rs
@@ -35,6 +35,15 @@ pub enum AnyQuery {
    VectorQuery(VectorQueryRequest),
 }

+impl AnyQuery {
+    pub(crate) fn base(&self) -> &QueryRequest {
+        match self {
+            Self::Query(query) => query,
+            Self::VectorQuery(query) => &query.base,
+        }
+    }
+}
+
 //Decide between namespace or local
 pub async fn execute_query(
    table: &NativeTable,
@@ -108,6 +117,7 @@ pub async fn create_plan(
        AnyQuery::VectorQuery(query) => query.clone(),
        AnyQuery::Query(query) => VectorQueryRequest::from_plain_query(query.clone()),
    };
+    query.base.check_filter()?;

    let ds_ref = table.dataset.get().await?;
    let schema = ds_ref.schema();
@@ -357,6 +367,7 @@ async fn execute_namespace_query(

 /// Convert an AnyQuery to the namespace QueryTableRequest format.
 fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
+    query.base().check_filter()?;
    match query {
        AnyQuery::VectorQuery(vq) => {
            // Extract the query vector(s)
Author	SHA1	Message	Date
Lance Release	37466a0390	Bump version: 0.31.0-beta.5 → 0.31.0-beta.6	2026-07-02 11:33:53 +00:00
Lance Release	bfce8a510d	Bump version: 0.34.0-beta.5 → 0.34.0-beta.6	2026-07-02 11:32:45 +00:00
Armaan Sandhu	a1261e6299	fix(python): average MRR reciprocal ranks over all rankings (#3599 ) ## What `MRRReranker.rerank_multivector` averages each document's reciprocal ranks over the wrong denominator. It divides by the number of rankings the document happens to appear in, instead of the total number of rankings being fused. ```python # python/python/lancedb/rerankers/mrr.py for result_id, reciprocal_ranks in mrr_score_map.items(): mean_rr = np.mean(reciprocal_ranks) # divides by len(present systems) ``` `mrr_score_map[doc]` only accumulates a reciprocal rank for the systems in which the document was returned, so `np.mean` never accounts for the systems that missed it. ## Why it's wrong Mean Reciprocal Rank fusion treats a system that didn't return a document as a reciprocal rank of `0` and averages across all systems. That's the exact mechanism by which it rewards cross-system consensus. Dividing by the appearance count removes that, so a document liked by a single ranking can beat one ranked highly by every ranking. Concretely, fusing 3 vector rankings: \| Doc \| Ranks \| Current score \| Correct score \| \|-----\|-------\|---------------\|---------------\| \| A \| #1 in 1 system only \| `mean([1.0]) = 1.000` \| `1.0 / 3 = 0.333` \| \| B \| #1, #1, #2 across all 3 \| `mean([1, 1, .5]) = 0.833` \| `2.5 / 3 = 0.833` \| The current code ranks A above B - a document two of three rankings ignored outranks one all three ranked at or near the top. This also makes `rerank_multivector` inconsistent with `rerank_hybrid` in the same file, which already treats a missing system as `0` (`vector_rr = 0.0` / `fts_rr = 0.0`), and with the class docstring ("average of reciprocal ranks across different search results"). ## Fix Divide the summed reciprocal ranks by the total number of rankings: ```python num_systems = len(vector_results) ... mean_rr = float(np.sum(reciprocal_ranks)) / num_systems ``` ## Tests Adds `test_mrr_multivector_rewards_consensus`, which asserts the exact MRR scores and that the consensus document ranks first. It fails on `main` and passes with this change. Existing reranker tests are unaffected.	2026-07-01 15:36:56 -07:00
Neo-X7	17c499177f	docs(python): add missing parameter documentation for when_matched_update_all (#3536 ) Fixes #2493 Added target. prefix requirement to where parameter docstring.	2026-07-01 10:28:58 -07:00
Will Jones	d889321b5e	fix!: combine repeated where filters with AND instead of replacing (#3585 ) BREAKING CHANGE: When passing multiple where clauses to a query, they now stack instead of replacing the previous filter. Previously, calling `where`/`only_if` more than once on a query silently replaced the previous filter, so only the last filter was applied. This was surprising and could return rows that an earlier filter should have excluded. This implements the alternative suggested in https://github.com/lancedb/lancedb/pull/3514#issuecomment-4664901580: instead of rejecting a second filter, repeated filters are combined with a logical AND (`(previous) AND (new)`). The combination happens in the Rust core (`QueryBase::only_if` and `only_if_expr`), so it applies to all SDKs at once (Rust, Python async, and TypeScript). The Python sync query builder keeps its own filter state, so it combines filters in the binding layer as well. SQL string and expression filters are combined within their own representation. When the two representations are mixed, the expression is lowered to SQL (via `expr_to_sql_string`) and the filters are combined as SQL strings, so chaining `where` works regardless of which form each filter takes. Fixes #2649 ## Tests - Rust: `cargo test --features remote -p lancedb --lib query` - Python: `uv run --extra tests pytest python/tests/test_query.py` - TypeScript: `pnpm test __test__/query.test.ts` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-07-01 10:11:58 -07:00
Will Jones	8a37f2ad77	feat(rust): re-export arrow and datafusion crates from lancedb (#3576 ) lancedb's public API forces downstream crates to construct foreign types — `RecordBatch`/arrays/builders for `Table::add(...)` (arrow), and `datafusion_expr::Expr` for `only_if_expr`/`expr_projection`/merge filters. The required version must exactly match lancedb's internal arrow/datafusion line, but nothing on the API surface makes that visible. Drift surfaces only as confusing trait/type errors: ```text error[E0277]: the trait bound `RecordBatch: Scannable` is not satisfied = note: there are multiple different versions of crate `arrow_array` in the dependency graph ``` This re-exports the crates lancedb already pins, so consumers can rely on a single, guaranteed-matching line via a discoverable import path instead of declaring their own (potentially mismatched) direct dependency. - `lancedb::arrow::{arrow, arrow_array, arrow_buffer, arrow_cast, arrow_data, arrow_ipc, arrow_ord, arrow_schema, arrow_select}` — previously only `arrow_schema` was re-exported. `arrow-buffer` is promoted from a transitive to a direct dependency. - `lancedb::datafusion` — `Expr` is a first-class part of the query and merge APIs (`only_if_expr`, `expr_projection`, `QueryFilter::Datafusion`, `when_matched_update_all_expr`), and `ExecutionPlan` is returned from `create_plan`. This follows DataFusion's own precedent of re-exporting `arrow`. The coupling already exists via the trait/impl bounds — this surfaces it rather than hiding it behind an `E0277`. Closes #3575 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-07-01 10:10:55 -07:00
Raphael Malikian	f94673ae5e	ci: update deprecated GitHub Actions to latest versions (Fixes #3577 ) (#3608 ) Fixes #3577 ## Problem GitHub Actions is deprecating Node.js 20 on its runners. Multiple workflows in lancedb use action versions that target Node.js 20 (`actions/checkout@v4`, `actions/setup-node@v4`, `actions/cache@v4`, `actions/upload-artifact@v4`, `actions/download-artifact@v4`, `pnpm/action-setup@v4`). These are being force-run on Node.js 24, generating deprecation warnings. ## Solution Updated all deprecated actions to their latest major versions that support Node.js 24: \| Action \| Old Version \| New Version \| \|--------\|------------\|-------------\| \| `actions/checkout` \| @v4 \| @v6 \| \| `actions/setup-node` \| @v4 \| @v6 \| \| `actions/cache` \| @v4 \| @v5 \| \| `actions/upload-artifact` \| @v4 \| @v7 \| \| `actions/download-artifact` \| @v4 \| @v8 \| \| `pnpm/action-setup` \| @v4 \| @v6 \| Note: `actions/checkout@v6` and `actions/upload-artifact@v7` are already used in `pypi-publish.yml` — this PR extends the same versions to all remaining workflows. ### Files Changed - `.github/workflows/npm-publish.yml` — Updated checkout, setup-node, cache, upload-artifact, download-artifact, pnpm - `.github/workflows/nodejs.yml` — Updated checkout, setup-node, pnpm - `.github/workflows/python.yml` — Updated checkout - `.github/workflows/rust.yml` — Updated checkout - `.github/workflows/java.yml` — Updated checkout - `.github/workflows/java-publish.yml` — Updated checkout - `.github/workflows/cargo-publish.yml` — Updated checkout - `.github/workflows/docs.yml` — Updated checkout, setup-node - `.github/workflows/dev.yml` — Updated setup-node - `.github/workflows/codex-fix-ci.yml` — Updated checkout, setup-node, pnpm - `.github/workflows/codex-update-lance-dependency.yml` — Updated checkout, setup-node - `.github/workflows/license-header-check.yml` — Updated checkout - `.github/workflows/make-release-commit.yml` — Updated checkout - `.github/workflows/update_package_lock_run.yml` — Updated checkout - `.github/workflows/update_package_lock_run_nodejs.yml` — Updated checkout ## Verification - All 20 YAML files validated with `yaml.safe_load()` — no syntax errors - GitHub Actions CI will validate the actual action versions at runtime ## Changelog \| Date \| Change \| Author \| \|------\|--------\|--------\| \| 2026-07-01 \| Updated all deprecated Node 20 actions to latest versions across 15 workflow files \| rtmalikian \| --- Disclosure: This code was developed with assistance from DeepSeek-v4-pro (DeepSeek) via Hermes Agent (Nous Research). All changes were reviewed and verified for correctness. Signed-off-by: rtmalikian <rtmalikian@gmail.com>	2026-07-01 09:38:26 -07:00
Jack Ye	3b70fc4c9d	fix(python): route async namespace connections through rust (#3603 ) Summary: - Route built-in async namespace-backed connections through the Rust namespace connector. - Delegate async namespace/table management methods to the inner AsyncConnection while keeping the custom implementation Python-client fallback. - Add regressions for the native async dir path and lazy namespace_client() construction. Validated locally with targeted namespace/db/table pytest, full test_namespace.py, ruff, cargo fmt/check/clippy, and cargo test -p lancedb-python.	2026-06-30 17:03:23 -07:00
Lance Release	3a7b02119b	Bump version: 0.31.0-beta.4 → 0.31.0-beta.5	2026-06-30 22:24:56 +00:00