fix pylance test version

chore: update lance dependency to v3.1.0-beta.1
2026-07-04 03:20:40 +00:00 · 2026-02-24 08:15:25 -08:00 · 2026-02-24 08:03:11 +00:00
117 changed files with 870 additions and 2161 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.27.0-beta.3"
+current_version = "0.27.0-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/build_linux_wheel/action.yml
+++ b/.github/workflows/build_linux_wheel/action.yml
@@ -29,7 +29,6 @@ runs:
      if: ${{ inputs.arm-build == 'false' }}
      uses: PyO3/maturin-action@v1
      with:
-        maturin-version: "1.12.4"
        command: build
        working-directory: python
        docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
@@ -45,7 +44,6 @@ runs:
      if: ${{ inputs.arm-build == 'true' }}
      uses: PyO3/maturin-action@v1
      with:
-        maturin-version: "1.12.4"
        command: build
        working-directory: python
        docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
--- a/.github/workflows/build_mac_wheel/action.yml
+++ b/.github/workflows/build_mac_wheel/action.yml
@@ -20,7 +20,6 @@ runs:
      uses: PyO3/maturin-action@v1
      with:
        command: build
-        maturin-version: "1.12.4"
        # TODO: pass through interpreter
        args: ${{ inputs.args }}
        docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
--- a/.github/workflows/build_windows_wheel/action.yml
+++ b/.github/workflows/build_windows_wheel/action.yml
@@ -25,7 +25,6 @@ runs:
      uses: PyO3/maturin-action@v1
      with:
        command: build
-        maturin-version: "1.12.4"
        args: ${{ inputs.args }}
        docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
        working-directory: python
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -356,8 +356,7 @@ jobs:
          if [[ $DRY_RUN == "true" ]]; then
            ARGS="$ARGS --dry-run"
          fi
-          VERSION=$(node -p "require('./package.json').version")
-          if [[ $VERSION == *-* ]]; then
+          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -10,10 +10,6 @@ on:
      - python/**
      - rust/**
      - .github/workflows/python.yml
-      - .github/workflows/build_linux_wheel/**
-      - .github/workflows/build_mac_wheel/**
-      - .github/workflows/build_windows_wheel/**
-      - .github/workflows/run_tests/**

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -100,9 +100,7 @@ jobs:
          lfs: true
      - uses: Swatinem/rust-cache@v2
      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
+        run: sudo apt install -y protobuf-compiler libssl-dev
      - uses: rui314/setup-mold@v1
      - name: Make Swap
        run: |
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ exclude = ["python"]
 resolver = "2"

 [workspace.package]
-edition = "2024"
+edition = "2021"
 authors = ["LanceDB Devs <dev@lancedb.com>"]
 license = "Apache-2.0"
 repository = "https://github.com/lancedb/lancedb"
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=4.0.0-beta.12", default-features = false, "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=4.0.0-beta.12", default-features = false, "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=4.0.0-beta.12", default-features = false, "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=3.1.0-beta.1", default-features = false, "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=3.1.0-beta.1", default-features = false, "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=3.1.0-beta.1", default-features = false, "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=3.1.0-beta.1", "tag" = "v3.1.0-beta.1", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "57.2", optional = false }
@@ -40,15 +40,13 @@ arrow-schema = "57.2"
 arrow-select = "57.2"
 arrow-cast = "57.2"
 async-trait = "0"
-datafusion = { version = "52.1", default-features = false }
-datafusion-catalog = "52.1"
-datafusion-common = { version = "52.1", default-features = false }
-datafusion-execution = "52.1"
-datafusion-expr = "52.1"
-datafusion-functions = "52.1"
-datafusion-physical-plan = "52.1"
-datafusion-physical-expr = "52.1"
-datafusion-sql = "52.1"
+datafusion = { version = "51.0", default-features = false }
+datafusion-catalog = "51.0"
+datafusion-common = { version = "51.0", default-features = false }
+datafusion-execution = "51.0"
+datafusion-expr = "51.0"
+datafusion-physical-plan = "51.0"
+datafusion-physical-expr = "51.0"
 env_logger = "0.11"
 half = { "version" = "2.7.1", default-features = false, features = [
    "num-traits",
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.27.0-beta.3</version>
+    <version>0.27.0-beta.1</version>
 </dependency>
 ```

--- a/docs/src/js/interfaces/DeleteResult.md
+++ b/docs/src/js/interfaces/DeleteResult.md
@@ -8,14 +8,6 @@

 ## Properties

-### numDeletedRows
-
-```ts
-numDeletedRows: number;
-```
-
-***
-
 ### version

 ```ts
--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.27.0-beta.3</version>
+      <version>0.27.0-beta.1</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.27.0-beta.3</version>
+    <version>0.27.0-beta.1</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>3.1.0-beta.2</lance-core.version>
+        <lance-core.version>3.1.0-beta.1</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.27.0-beta.3"
+version = "0.27.0-beta.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1697,65 +1697,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(results2[0].text).toBe(data[1].text);
    });

-    test("full text search fast search", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [{ text: "hello world", vector: [0.1, 0.2, 0.3], id: 1 }];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts(),
-      });
-
-      // Insert unindexed data after creating the index.
-      await table.add([{ text: "xyz", vector: [0.4, 0.5, 0.6], id: 2 }]);
-
-      const withFlatSearch = await table
-        .search("xyz", "fts")
-        .limit(10)
-        .toArray();
-      expect(withFlatSearch.length).toBeGreaterThan(0);
-
-      const fastSearchResults = await table
-        .search("xyz", "fts")
-        .fastSearch()
-        .limit(10)
-        .toArray();
-      expect(fastSearchResults.length).toBe(0);
-
-      const nearestToTextFastSearch = await table
-        .query()
-        .nearestToText("xyz")
-        .fastSearch()
-        .limit(10)
-        .toArray();
-      expect(nearestToTextFastSearch.length).toBe(0);
-
-      // fastSearch should be chainable with other methods.
-      const chainedFastSearch = await table
-        .search("xyz", "fts")
-        .fastSearch()
-        .select(["text"])
-        .limit(5)
-        .toArray();
-      expect(chainedFastSearch.length).toBe(0);
-
-      await table.optimize();
-
-      const indexedFastSearch = await table
-        .search("xyz", "fts")
-        .fastSearch()
-        .limit(10)
-        .toArray();
-      expect(indexedFastSearch.length).toBeGreaterThan(0);
-
-      const indexedNearestToTextFastSearch = await table
-        .query()
-        .nearestToText("xyz")
-        .fastSearch()
-        .limit(10)
-        .toArray();
-      expect(indexedNearestToTextFastSearch.length).toBeGreaterThan(0);
-    });
-
    test("prewarm full text search index", async () => {
      const db = await connect(tmpDir.name);
      const data = [
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.27.0-beta.3",
+	"version": "0.27.0-beta.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.27.0-beta.3",
+	"version": "0.27.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.27.0-beta.3",
+	"version": "0.27.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.27.0-beta.3",
+	"version": "0.27.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.27.0-beta.3",
+	"version": "0.27.0-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.27.0-beta.3",
+  "version": "0.27.0-beta.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.27.0-beta.3",
+	"version": "0.27.0-beta.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.27.0-beta.3",
+  "version": "0.27.0-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.27.0-beta.3",
+      "version": "0.27.0-beta.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.27.0-beta.3",
+  "version": "0.27.0-beta.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -8,10 +8,10 @@ use lancedb::database::{CreateTableMode, Database};
 use napi::bindgen_prelude::*;
 use napi_derive::*;

-use crate::ConnectionOptions;
 use crate::error::NapiErrorExt;
 use crate::header::JsHeaderProvider;
 use crate::table::Table;
+use crate::ConnectionOptions;
 use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};

 use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -3,12 +3,12 @@

 use std::sync::Mutex;

-use lancedb::index::Index as LanceDbIndex;
 use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
 use lancedb::index::vector::{
    IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
    IvfRqIndexBuilder,
 };
+use lancedb::index::Index as LanceDbIndex;
 use napi_derive::napi;

 use crate::util::parse_distance_type;
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -17,8 +17,8 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery;
 use napi::bindgen_prelude::*;
 use napi_derive::napi;

-use crate::error::NapiErrorExt;
 use crate::error::convert_error;
+use crate::error::NapiErrorExt;
 use crate::iterator::RecordBatchIterator;
 use crate::rerankers::RerankHybridCallbackArgs;
 use crate::rerankers::Reranker;
@@ -551,12 +551,15 @@ fn parse_fts_query(query: Object) -> napi::Result<FullTextSearchQuery> {
            }
        };
        let mut query = FullTextSearchQuery::new_query(query);
-        if let Some(cols) = columns
-            && !cols.is_empty()
-        {
-            query = query.with_columns(&cols).map_err(|e| {
-                napi::Error::from_reason(format!("Failed to set full text search columns: {}", e))
-            })?;
+        if let Some(cols) = columns {
+            if !cols.is_empty() {
+                query = query.with_columns(&cols).map_err(|e| {
+                    napi::Error::from_reason(format!(
+                        "Failed to set full text search columns: {}",
+                        e
+                    ))
+                })?;
+            }
        }
        Ok(query)
    } else {
--- a/nodejs/src/session.rs
+++ b/nodejs/src/session.rs
@@ -95,7 +95,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
        napi_val: napi::sys::napi_value,
    ) -> napi::Result<Self> {
        let object: napi::bindgen_prelude::ClassInstance<Self> =
-            unsafe { napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)? };
+            napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
        Ok((*object).clone())
    }
 }
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -753,14 +753,12 @@ impl From<lancedb::table::AddResult> for AddResult {

 #[napi(object)]
 pub struct DeleteResult {
-    pub num_deleted_rows: i64,
    pub version: i64,
 }

 impl From<lancedb::table::DeleteResult> for DeleteResult {
    fn from(value: lancedb::table::DeleteResult) -> Self {
        Self {
-            num_deleted_rows: value.num_deleted_rows as i64,
            version: value.version as i64,
        }
    }
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.30.0-beta.3"
+current_version = "0.30.0-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.30.0-beta.3"
+version = "0.30.0-beta.1"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"

 [project.optional-dependencies]
 pylance = [
-    "pylance>=1.0.0b14",
+    "pylance>=3.1.0b1",
 ]
 tests = [
    "aiohttp",
@@ -59,9 +59,9 @@ tests = [
    "polars>=0.19, <=1.3.0",
    "tantivy",
    "pyarrow-stubs",
-    "pylance>=1.0.0b14,<3.0.0",
+    "pylance>=3.1.0b1",
    "requests",
-    "datafusion<52",
+    "datafusion>=51,<52",  # Must match pylance's DataFusion version
 ]
 dev = [
    "ruff",
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -606,7 +606,6 @@ class LanceQueryBuilder(ABC):
                query,
                ordering_field_name=ordering_field_name,
                fts_columns=fts_columns,
-                fast_search=fast_search,
            )

        if isinstance(query, list):
@@ -1457,14 +1456,12 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
        query: str | FullTextQuery,
        ordering_field_name: Optional[str] = None,
        fts_columns: Optional[Union[str, List[str]]] = None,
-        fast_search: bool = None,
    ):
        super().__init__(table)
        self._query = query
        self._phrase_query = False
        self.ordering_field_name = ordering_field_name
        self._reranker = None
-        self._fast_search = fast_search
        if isinstance(fts_columns, str):
            fts_columns = [fts_columns]
        self._fts_columns = fts_columns
@@ -1486,19 +1483,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
        self._phrase_query = phrase_query
        return self

-    def fast_search(self) -> LanceFtsQueryBuilder:
-        """
-        Skip a flat search of unindexed data. This will improve
-        search performance but search results will not include unindexed data.
-
-        Returns
-        -------
-        LanceFtsQueryBuilder
-            The LanceFtsQueryBuilder object.
-        """
-        self._fast_search = True
-        return self
-
    def to_query_object(self) -> Query:
        return Query(
            columns=self._columns,
@@ -1510,7 +1494,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
                query=self._query, columns=self._fts_columns
            ),
            offset=self._offset,
-            fast_search=self._fast_search,
        )

    def output_schema(self) -> pa.Schema:
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -218,6 +218,8 @@ class RemoteTable(Table):
        train: bool = True,
    ):
        """Create an index on the table.
+        Currently, the only parameters that matter are
+        the metric and the vector column name.

        Parameters
        ----------
@@ -248,6 +250,11 @@ class RemoteTable(Table):
        >>> table.create_index("l2", "vector") # doctest: +SKIP
        """

+        if num_sub_vectors is not None:
+            logging.warning(
+                "num_sub_vectors is not supported on LanceDB cloud."
+                "This parameter will be tuned automatically."
+            )
        if accelerator is not None:
            logging.warning(
                "GPU accelerator is not yet supported on LanceDB cloud."
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1331,7 +1331,7 @@ class Table(ABC):
        1  2  [3.0, 4.0]
        2  3  [5.0, 6.0]
        >>> table.delete("x = 2")
-        DeleteResult(num_deleted_rows=1, version=2)
+        DeleteResult(version=2)
        >>> table.to_pandas()
           x      vector
        0  1  [1.0, 2.0]
@@ -1345,7 +1345,7 @@ class Table(ABC):
        >>> to_remove
        '1, 5'
        >>> table.delete(f"x IN ({to_remove})")
-        DeleteResult(num_deleted_rows=1, version=3)
+        DeleteResult(version=3)
        >>> table.to_pandas()
           x      vector
        0  3  [5.0, 6.0]
@@ -4215,7 +4215,7 @@ class AsyncTable:
        1  2  [3.0, 4.0]
        2  3  [5.0, 6.0]
        >>> table.delete("x = 2")
-        DeleteResult(num_deleted_rows=1, version=2)
+        DeleteResult(version=2)
        >>> table.to_pandas()
           x      vector
        0  1  [1.0, 2.0]
@@ -4229,7 +4229,7 @@ class AsyncTable:
        >>> to_remove
        '1, 5'
        >>> table.delete(f"x IN ({to_remove})")
-        DeleteResult(num_deleted_rows=1, version=3)
+        DeleteResult(version=3)
        >>> table.to_pandas()
           x      vector
        0  3  [5.0, 6.0]
--- a/python/python/lancedb/util.py
+++ b/python/python/lancedb/util.py
@@ -324,16 +324,6 @@ def _(value: list):
    return "[" + ", ".join(map(value_to_sql, value)) + "]"


-@value_to_sql.register(dict)
-def _(value: dict):
-    # https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct
-    return (
-        "named_struct("
-        + ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items())
-        + ")"
-    )
-
-
@value_to_sql.register(np.ndarray)
 def _(value: np.ndarray):
    return value_to_sql(value.tolist())
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -27,7 +27,6 @@ from lancedb.query import (
    PhraseQuery,
    BooleanQuery,
    Occur,
-    LanceFtsQueryBuilder,
 )
 import numpy as np
 import pyarrow as pa
@@ -883,109 +882,3 @@ def test_fts_query_to_json():
        '"must_not":[]}}'
    )
    assert json_str == expected
-
-
-def test_fts_fast_search(table):
-    table.create_fts_index("text", use_tantivy=False)
-
-    # Insert some unindexed data
-    table.add(
-        [
-            {
-                "text": "xyz",
-                "vector": [0 for _ in range(128)],
-                "id": 101,
-                "text2": "xyz",
-                "nested": {"text": "xyz"},
-                "count": 10,
-            }
-        ]
-    )
-
-    # Without fast_search, the query object should not have fast_search set
-    builder = table.search("xyz", query_type="fts").limit(10)
-    query = builder.to_query_object()
-    assert query.fast_search is None
-
-    # With fast_search, the query object should have fast_search=True
-    builder = table.search("xyz", query_type="fts").fast_search().limit(10)
-    query = builder.to_query_object()
-    assert query.fast_search is True
-
-    # fast_search should be chainable with other methods
-    builder = (
-        table.search("xyz", query_type="fts").fast_search().select(["text"]).limit(5)
-    )
-    query = builder.to_query_object()
-    assert query.fast_search is True
-    assert query.limit == 5
-    assert query.columns == ["text"]
-
-    # fast_search should be enabled by keyword argument too
-    query = LanceFtsQueryBuilder(table, "xyz", fast_search=True).to_query_object()
-    assert query.fast_search is True
-
-    # Verify it executes without error and skips unindexed data
-    results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
-    assert len(results) == 0
-
-    # Update index and verify it returns results
-    table.optimize()
-    results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
-    assert len(results) > 0
-
-
-@pytest.mark.asyncio
-async def test_fts_fast_search_async(async_table):
-    await async_table.create_index("text", config=FTS())
-
-    # Insert some unindexed data
-    await async_table.add(
-        [
-            {
-                "text": "xyz",
-                "vector": [0 for _ in range(128)],
-                "id": 101,
-                "text2": "xyz",
-                "nested": {"text": "xyz"},
-                "count": 10,
-            }
-        ]
-    )
-
-    # Without fast_search, should return results
-    results = await async_table.query().nearest_to_text("xyz").limit(5).to_list()
-    assert len(results) > 0
-
-    # With fast_search, should return no results data unindexed
-    fast_results = (
-        await async_table.query()
-        .nearest_to_text("xyz")
-        .fast_search()
-        .limit(5)
-        .to_list()
-    )
-    assert len(fast_results) == 0
-
-    # Update index and verify it returns results
-    await async_table.optimize()
-
-    fast_results = (
-        await async_table.query()
-        .nearest_to_text("xyz")
-        .fast_search()
-        .limit(5)
-        .to_list()
-    )
-    assert len(fast_results) > 0
-
-    # fast_search should be chainable with other methods
-    results = (
-        await async_table.query()
-        .nearest_to_text("xyz")
-        .fast_search()
-        .select(["text"])
-        .limit(5)
-        .to_list()
-    )
-    assert len(results) > 0
--- a/python/python/tests/test_util.py
+++ b/python/python/tests/test_util.py
@@ -121,32 +121,6 @@ def test_value_to_sql_string(tmp_path):
        assert table.to_pandas().query("search == @value")["replace"].item() == value


-def test_value_to_sql_dict():
-    # Simple flat struct
-    assert value_to_sql({"a": 1, "b": "hello"}) == "named_struct('a', 1, 'b', 'hello')"
-
-    # Nested struct
-    assert (
-        value_to_sql({"outer": {"inner": 1}})
-        == "named_struct('outer', named_struct('inner', 1))"
-    )
-
-    # List inside struct
-    assert value_to_sql({"a": [1, 2]}) == "named_struct('a', [1, 2])"
-
-    # Mixed types
-    assert (
-        value_to_sql({"name": "test", "count": 42, "rate": 3.14, "active": True})
-        == "named_struct('name', 'test', 'count', 42, 'rate', 3.14, 'active', TRUE)"
-    )
-
-    # Null value inside struct
-    assert value_to_sql({"a": None}) == "named_struct('a', NULL)"
-
-    # Empty dict
-    assert value_to_sql({}) == "named_struct()"
-
-
 def test_append_vector_columns():
    registry = EmbeddingFunctionRegistry.get_instance()
    registry.register("test")(MockTextEmbeddingFunction)
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -10,7 +10,7 @@ use arrow::{
 use futures::stream::StreamExt;
 use lancedb::arrow::SendableRecordBatchStream;
 use pyo3::{
-    Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods,
+    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -9,10 +9,10 @@ use lancedb::{
    database::{CreateTableMode, Database, ReadConsistency},
 };
 use pyo3::{
-    Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pyfunction, pymethods,
    types::{PyDict, PyDictMethods},
+    Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

--- a/python/src/error.rs
+++ b/python/src/error.rs
@@ -2,10 +2,10 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use pyo3::{
-    PyErr, PyResult, Python,
    exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError},
    intern,
    types::{PyAnyMethods, PyNone},
+    PyErr, PyResult, Python,
 };

 use lancedb::error::Error as LanceError;
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -3,17 +3,17 @@

 use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
 use lancedb::index::{
-    Index as LanceDbIndex,
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
+    Index as LanceDbIndex,
 };
-use pyo3::IntoPyObject;
 use pyo3::types::PyStringMethods;
+use pyo3::IntoPyObject;
 use pyo3::{
-    Bound, FromPyObject, PyAny, PyResult, Python,
    exceptions::{PyKeyError, PyValueError},
    intern, pyclass, pymethods,
    types::PyAnyMethods,
+    Bound, FromPyObject, PyAny, PyResult, Python,
 };

 use crate::util::parse_distance_type;
@@ -41,12 +41,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                let inner_opts = FtsIndexBuilder::default()
                    .base_tokenizer(params.base_tokenizer)
                    .language(&params.language)
-                    .map_err(|_| {
-                        PyValueError::new_err(format!(
-                            "LanceDB does not support the requested language: '{}'",
-                            params.language
-                        ))
-                    })?
+                    .map_err(|_| PyValueError::new_err(format!("LanceDB does not support the requested language: '{}'", params.language)))?
                    .with_position(params.with_position)
                    .lower_case(params.lower_case)
                    .max_token_length(params.max_token_length)
@@ -57,7 +52,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    .ngram_max_length(params.ngram_max_length)
                    .ngram_prefix_only(params.prefix_only);
                Ok(LanceDbIndex::FTS(inner_opts))
-            }
+            },
            "IvfFlat" => {
                let params = source.extract::<IvfFlatParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -69,11 +64,10 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
                }
                if let Some(target_partition_size) = params.target_partition_size {
-                    ivf_flat_builder =
-                        ivf_flat_builder.target_partition_size(target_partition_size);
+                    ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
-            }
+            },
            "IvfPq" => {
                let params = source.extract::<IvfPqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -92,7 +86,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
                }
                Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
-            }
+            },
            "IvfSq" => {
                let params = source.extract::<IvfSqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -107,7 +101,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
-            }
+            },
            "IvfRq" => {
                let params = source.extract::<IvfRqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -123,7 +117,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
-            }
+            },
            "HnswPq" => {
                let params = source.extract::<IvfHnswPqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -144,7 +138,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
                }
                Ok(LanceDbIndex::IvfHnswPq(hnsw_pq_builder))
-            }
+            },
            "HnswSq" => {
                let params = source.extract::<IvfHnswSqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -161,7 +155,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
                }
                Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
-            }
+            },
            not_supported => Err(PyValueError::new_err(format!(
                "Invalid index type '{}'.  Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
                not_supported
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -2,14 +2,14 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use arrow::RecordBatchStream;
-use connection::{Connection, connect};
+use connection::{connect, Connection};
 use env_logger::Env;
 use index::IndexConfig;
 use permutation::{PyAsyncPermutationBuilder, PyPermutationReader};
 use pyo3::{
-    Bound, PyResult, Python, pymodule,
+    pymodule,
    types::{PyModule, PyModuleMethods},
-    wrap_pyfunction,
+    wrap_pyfunction, Bound, PyResult, Python,
 };
 use query::{FTSQuery, HybridQuery, Query, VectorQuery};
 use session::Session;
--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -16,10 +16,10 @@ use lancedb::{
    query::Select,
 };
 use pyo3::{
-    Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
    exceptions::PyRuntimeError,
    pyclass, pymethods,
    types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
+    Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -4,9 +4,9 @@
 use std::sync::Arc;
 use std::time::Duration;

+use arrow::array::make_array;
 use arrow::array::Array;
 use arrow::array::ArrayData;
-use arrow::array::make_array;
 use arrow::pyarrow::FromPyArrow;
 use arrow::pyarrow::IntoPyArrow;
 use arrow::pyarrow::ToPyArrow;
@@ -22,23 +22,23 @@ use lancedb::query::{
    VectorQuery as LanceDbVectorQuery,
 };
 use lancedb::table::AnyQuery;
+use pyo3::prelude::{PyAnyMethods, PyDictMethods};
+use pyo3::pyfunction;
+use pyo3::pymethods;
+use pyo3::types::PyList;
+use pyo3::types::{PyDict, PyString};
 use pyo3::Bound;
 use pyo3::IntoPyObject;
 use pyo3::PyAny;
 use pyo3::PyRef;
 use pyo3::PyResult;
 use pyo3::Python;
-use pyo3::prelude::{PyAnyMethods, PyDictMethods};
-use pyo3::pyfunction;
-use pyo3::pymethods;
-use pyo3::types::PyList;
-use pyo3::types::{PyDict, PyString};
-use pyo3::{FromPyObject, exceptions::PyRuntimeError};
-use pyo3::{PyErr, pyclass};
+use pyo3::{exceptions::PyRuntimeError, FromPyObject};
 use pyo3::{
    exceptions::{PyNotImplementedError, PyValueError},
    intern,
 };
+use pyo3::{pyclass, PyErr};
 use pyo3_async_runtimes::tokio::future_into_py;

 use crate::util::parse_distance_type;
--- a/python/src/session.rs
+++ b/python/src/session.rs
@@ -4,7 +4,7 @@
 use std::sync::Arc;

 use lancedb::{ObjectStoreRegistry, Session as LanceSession};
-use pyo3::{PyResult, pyclass, pymethods};
+use pyo3::{pyclass, pymethods, PyResult};

 /// A session for managing caches and object stores across LanceDB operations.
 ///
--- a/python/src/storage_options.rs
+++ b/python/src/storage_options.rs
@@ -66,10 +66,13 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
                    .inner
                    .bind(py)
                    .call_method0("fetch_storage_options")
-                    .map_err(|e| lance_core::Error::io_source(Box::new(std::io::Error::other(format!(
-                        "Failed to call fetch_storage_options: {}",
-                        e
-                    )))))?;
+                    .map_err(|e| lance_core::Error::IO {
+                        source: Box::new(std::io::Error::other(format!(
+                            "Failed to call fetch_storage_options: {}",
+                            e
+                        ))),
+                        location: snafu::location!(),
+                    })?;

                // If result is None, return None
                if result.is_none() {
@@ -78,19 +81,26 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {

                // Extract the result dict - should be a flat Map<String, String>
                let result_dict = result.downcast::<PyDict>().map_err(|_| {
-                    lance_core::Error::invalid_input(
-                        "fetch_storage_options() must return a dict of string key-value pairs or None",
-                    )
+                    lance_core::Error::InvalidInput {
+                        source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
+                        location: snafu::location!(),
+                    }
                })?;

                // Convert all entries to HashMap<String, String>
                let mut storage_options = HashMap::new();
                for (key, value) in result_dict.iter() {
                    let key_str: String = key.extract().map_err(|e| {
-                        lance_core::Error::invalid_input(format!("Storage option key must be a string: {}", e))
+                        lance_core::Error::InvalidInput {
+                            source: format!("Storage option key must be a string: {}", e).into(),
+                            location: snafu::location!(),
+                        }
                    })?;
                    let value_str: String = value.extract().map_err(|e| {
-                        lance_core::Error::invalid_input(format!("Storage option value must be a string: {}", e))
+                        lance_core::Error::InvalidInput {
+                            source: format!("Storage option value must be a string: {}", e).into(),
+                            location: snafu::location!(),
+                        }
                    })?;
                    storage_options.insert(key_str, value_str);
                }
@@ -99,10 +109,13 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
            })
        })
        .await
-        .map_err(|e| lance_core::Error::io_source(Box::new(std::io::Error::other(format!(
-            "Task join error: {}",
-            e
-        )))))?
+        .map_err(|e| lance_core::Error::IO {
+            source: Box::new(std::io::Error::other(format!(
+                "Task join error: {}",
+                e
+            ))),
+            location: snafu::location!(),
+        })?
    }

    fn provider_id(&self) -> String {
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -5,7 +5,7 @@ use std::{collections::HashMap, sync::Arc};
 use crate::{
    connection::Connection,
    error::PythonErrorExt,
-    index::{IndexConfig, extract_index_params},
+    index::{extract_index_params, IndexConfig},
    query::{Query, TakeQuery},
    table::scannable::PyScannable,
 };
@@ -19,10 +19,10 @@ use lancedb::table::{
    Table as LanceDbTable,
 };
 use pyo3::{
-    Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
    exceptions::{PyKeyError, PyRuntimeError, PyValueError},
    pyclass, pymethods,
    types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
+    Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -112,24 +112,19 @@ impl From<lancedb::table::AddResult> for AddResult {
 #[pyclass(get_all)]
 #[derive(Clone, Debug)]
 pub struct DeleteResult {
-    pub num_deleted_rows: u64,
    pub version: u64,
 }

 #[pymethods]
 impl DeleteResult {
    pub fn __repr__(&self) -> String {
-        format!(
-            "DeleteResult(num_deleted_rows={}, version={})",
-            self.num_deleted_rows, self.version
-        )
+        format!("DeleteResult(version={})", self.version)
    }
 }

 impl From<lancedb::table::DeleteResult> for DeleteResult {
    fn from(result: lancedb::table::DeleteResult) -> Self {
        Self {
-            num_deleted_rows: result.num_deleted_rows,
            version: result.version,
        }
    }
@@ -542,7 +537,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let versions = inner.list_versions().await.infer_error()?;
-            Python::attach(|py| {
+            let versions_as_dict = Python::attach(|py| {
                versions
                    .iter()
                    .map(|v| {
@@ -559,7 +554,9 @@ impl Table {
                        Ok(dict.unbind())
                    })
                    .collect::<PyResult<Vec<_>>>()
-            })
+            });
+
+            versions_as_dict
        })
    }

--- a/python/src/table/scannable.rs
+++ b/python/src/table/scannable.rs
@@ -10,11 +10,11 @@ use arrow::{
 };
 use futures::StreamExt;
 use lancedb::{
-    Error,
    arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
    data::scannable::Scannable,
+    Error,
 };
-use pyo3::{FromPyObject, Py, PyAny, Python, types::PyAnyMethods};
+use pyo3::{types::PyAnyMethods, FromPyObject, Py, PyAny, Python};

 /// Adapter that implements Scannable for a Python reader factory callable.
 ///
@@ -99,15 +99,15 @@ impl Scannable for PyScannable {
                        // Channel closed. Check if the task panicked — a panic
                        // drops the sender without sending an error, so without
                        // this check we'd silently return a truncated stream.
-                        if let Some(handle) = join_handle
-                            && let Err(join_err) = handle.await
-                        {
-                            return Some((
-                                Err(Error::Runtime {
-                                    message: format!("Reader task panicked: {}", join_err),
-                                }),
-                                (rx, None),
-                            ));
+                        if let Some(handle) = join_handle {
+                            if let Err(join_err) = handle.await {
+                                return Some((
+                                    Err(Error::Runtime {
+                                        message: format!("Reader task panicked: {}", join_err),
+                                    }),
+                                    (rx, None),
+                                ));
+                            }
                        }
                        None
                    }
--- a/python/src/util.rs
+++ b/python/src/util.rs
@@ -5,9 +5,8 @@ use std::sync::Mutex;

 use lancedb::DistanceType;
 use pyo3::{
-    PyResult,
    exceptions::{PyRuntimeError, PyValueError},
-    pyfunction,
+    pyfunction, PyResult,
 };

 /// A wrapper around a rust builder
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.27.0-beta.3"
+version = "0.27.0-beta.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -25,9 +25,7 @@ datafusion-catalog.workspace = true
 datafusion-common.workspace = true
 datafusion-execution.workspace = true
 datafusion-expr.workspace = true
-datafusion-functions.workspace = true
 datafusion-physical-expr.workspace = true
-datafusion-sql.workspace = true
 datafusion-physical-plan.workspace = true
 datafusion.workspace = true
 object_store = { workspace = true }
--- a/rust/lancedb/examples/bedrock.rs
+++ b/rust/lancedb/examples/bedrock.rs
@@ -9,9 +9,10 @@ use aws_config::Region;
 use aws_sdk_bedrockruntime::Client;
 use futures::StreamExt;
 use lancedb::{
-    Result, connect,
-    embeddings::{EmbeddingDefinition, EmbeddingFunction, bedrock::BedrockEmbeddingFunction},
+    connect,
+    embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
    query::{ExecutableQuery, QueryBase},
+    Result,
 };

 #[tokio::main]
--- a/rust/lancedb/examples/full_text_search.rs
+++ b/rust/lancedb/examples/full_text_search.rs
@@ -10,10 +10,10 @@ use futures::TryStreamExt;
 use lance_index::scalar::FullTextSearchQuery;
 use lancedb::connection::Connection;

-use lancedb::index::Index;
 use lancedb::index::scalar::FtsIndexBuilder;
+use lancedb::index::Index;
 use lancedb::query::{ExecutableQuery, QueryBase};
-use lancedb::{Result, Table, connect};
+use lancedb::{connect, Result, Table};
 use rand::random;

 #[tokio::main]
@@ -46,21 +46,19 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send
        .collect::<Vec<_>>();
    let n_terms = 3;
    let batches = RecordBatchIterator::new(
-        vec![
-            RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
-                    Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| {
-                        (0..n_terms)
-                            .map(|_| words[random::<u32>() as usize % words.len()])
-                            .collect::<Vec<_>>()
-                            .join(" ")
-                    }))),
-                ],
-            )
-            .unwrap(),
-        ]
+        vec![RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
+                Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| {
+                    (0..n_terms)
+                        .map(|_| words[random::<u32>() as usize % words.len()])
+                        .collect::<Vec<_>>()
+                        .join(" ")
+                }))),
+            ],
+        )
+        .unwrap()]
        .into_iter()
        .map(Ok),
        schema.clone(),
--- a/rust/lancedb/examples/hybrid_search.rs
+++ b/rust/lancedb/examples/hybrid_search.rs
@@ -5,15 +5,16 @@ use arrow_array::{RecordBatch, StringArray};
 use arrow_schema::{DataType, Field, Schema};
 use futures::TryStreamExt;
 use lance_index::scalar::FullTextSearchQuery;
-use lancedb::index::Index;
 use lancedb::index::scalar::FtsIndexBuilder;
+use lancedb::index::Index;
 use lancedb::{
-    Result, Table, connect,
+    connect,
    embeddings::{
-        EmbeddingDefinition, EmbeddingFunction,
-        sentence_transformers::SentenceTransformersEmbeddings,
+        sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
+        EmbeddingFunction,
    },
    query::{QueryBase, QueryExecutionOptions},
+    Result, Table,
 };
 use std::{iter::once, sync::Arc};

--- a/rust/lancedb/examples/ivf_pq.rs
+++ b/rust/lancedb/examples/ivf_pq.rs
@@ -14,10 +14,10 @@ use arrow_schema::{DataType, Field, Schema};
 use futures::TryStreamExt;
 use lancedb::connection::Connection;

-use lancedb::index::Index;
 use lancedb::index::vector::IvfPqIndexBuilder;
+use lancedb::index::Index;
 use lancedb::query::{ExecutableQuery, QueryBase};
-use lancedb::{DistanceType, Result, Table, connect};
+use lancedb::{connect, DistanceType, Result, Table};

 #[tokio::main]
 async fn main() -> Result<()> {
@@ -51,21 +51,19 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send

    // Create a RecordBatch stream.
    let batches = RecordBatchIterator::new(
-        vec![
-            RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
-                    Arc::new(
-                        FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
-                            (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
-                            DIM as i32,
-                        ),
+        vec![RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
+                Arc::new(
+                    FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
+                        (0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
+                        DIM as i32,
                    ),
-                ],
-            )
-            .unwrap(),
-        ]
+                ),
+            ],
+        )
+        .unwrap()]
        .into_iter()
        .map(Ok),
        schema.clone(),
--- a/rust/lancedb/examples/openai.rs
+++ b/rust/lancedb/examples/openai.rs
@@ -8,9 +8,10 @@ use std::{iter::once, sync::Arc};
 use arrow_array::{RecordBatch, StringArray};
 use futures::StreamExt;
 use lancedb::{
-    Result, connect,
-    embeddings::{EmbeddingDefinition, EmbeddingFunction, openai::OpenAIEmbeddingFunction},
+    connect,
+    embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
    query::{ExecutableQuery, QueryBase},
+    Result,
 };

 // --8<-- [end:imports]
--- a/rust/lancedb/examples/sentence_transformers.rs
+++ b/rust/lancedb/examples/sentence_transformers.rs
@@ -7,12 +7,13 @@ use arrow_array::{RecordBatch, StringArray};
 use arrow_schema::{DataType, Field, Schema};
 use futures::StreamExt;
 use lancedb::{
-    Result, connect,
+    connect,
    embeddings::{
-        EmbeddingDefinition, EmbeddingFunction,
-        sentence_transformers::SentenceTransformersEmbeddings,
+        sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
+        EmbeddingFunction,
    },
    query::{ExecutableQuery, QueryBase},
+    Result,
 };

 #[tokio::main]
--- a/rust/lancedb/examples/simple.rs
+++ b/rust/lancedb/examples/simple.rs
@@ -14,7 +14,7 @@ use futures::TryStreamExt;
 use lancedb::connection::Connection;
 use lancedb::index::Index;
 use lancedb::query::{ExecutableQuery, QueryBase};
-use lancedb::{Result, Table as LanceDbTable, connect};
+use lancedb::{connect, Result, Table as LanceDbTable};

 #[tokio::main]
 async fn main() -> Result<()> {
--- a/rust/lancedb/src/arrow.rs
+++ b/rust/lancedb/src/arrow.rs
@@ -12,7 +12,7 @@ use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount};
 #[cfg(feature = "polars")]
 use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame};

-use crate::{Error, error::Result};
+use crate::{error::Result, Error};

 /// An iterator of batches that also has a schema
 pub trait RecordBatchReader: Iterator<Item = Result<arrow_array::RecordBatch>> {
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -17,7 +17,6 @@ use lance_namespace::models::{
 #[cfg(feature = "aws")]
 use object_store::aws::AwsCredential;

-use crate::Table;
 use crate::connection::create_table::CreateTableBuilder;
 use crate::data::scannable::Scannable;
 use crate::database::listing::ListingDatabase;
@@ -32,6 +31,7 @@ use crate::remote::{
    client::ClientConfig,
    db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION},
 };
+use crate::Table;
 use lance::io::ObjectStoreParams;
 pub use lance_encoding::version::LanceFileVersion;
 #[cfg(feature = "remote")]
@@ -566,11 +566,8 @@ pub struct ConnectBuilder {
 }

 #[cfg(feature = "remote")]
-const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 3] = [
-    ("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name"),
-    ("AZURE_CLIENT_ID", "azure_client_id"),
-    ("AZURE_TENANT_ID", "azure_tenant_id"),
-];
+const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
+    [("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];

 impl ConnectBuilder {
    /// Create a new [`ConnectOptions`] with the given database URI.
@@ -761,10 +758,10 @@ impl ConnectBuilder {
        options: &mut HashMap<String, String>,
    ) {
        for (env_key, opt_key) in env_var_to_remote_storage_option {
-            if let Ok(env_value) = std::env::var(env_key)
-                && !options.contains_key(*opt_key)
-            {
-                options.insert((*opt_key).to_string(), env_value);
+            if let Ok(env_value) = std::env::var(env_key) {
+                if !options.contains_key(*opt_key) {
+                    options.insert((*opt_key).to_string(), env_value);
+                }
            }
        }
    }
@@ -1014,13 +1011,14 @@ mod tests {
    #[cfg(feature = "remote")]
    #[test]
    fn test_apply_env_defaults() {
-        let env_key = "PATH";
-        let env_val = std::env::var(env_key).expect("PATH should be set in test environment");
+        let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
+        let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
        let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
+        std::env::set_var(env_key, env_val);

        let mut options = HashMap::new();
        ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
-        assert_eq!(Some(&env_val), options.get(opts_key));
+        assert_eq!(Some(&env_val.to_string()), options.get(opts_key));

        options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
        ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
--- a/rust/lancedb/src/connection/create_table.rs
+++ b/rust/lancedb/src/connection/create_table.rs
@@ -6,12 +6,12 @@ use std::sync::Arc;
 use lance_io::object_store::StorageOptionsProvider;

 use crate::{
-    Error, Result, Table,
    connection::{merge_storage_options, set_storage_options_provider},
    data::scannable::{Scannable, WithEmbeddingsScannable},
    database::{CreateTableMode, CreateTableRequest, Database},
    embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry},
    table::WriteOptions,
+    Error, Result, Table,
 };

 pub struct CreateTableBuilder {
@@ -167,7 +167,7 @@ impl CreateTableBuilder {
 #[cfg(test)]
 mod tests {
    use arrow_array::{
-        Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, record_batch,
+        record_batch, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator,
    };
    use arrow_schema::{ArrowError, DataType, Field, Schema};
    use futures::TryStreamExt;
@@ -380,12 +380,11 @@ mod tests {
            .await
            .unwrap();
        let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)]));
-        assert!(
-            db.create_empty_table("test", other_schema.clone())
-                .execute()
-                .await
-                .is_err()
-        ); // TODO: assert what this error is
+        assert!(db
+            .create_empty_table("test", other_schema.clone())
+            .execute()
+            .await
+            .is_err()); // TODO: assert what this error is
        let overwritten = db
            .create_empty_table("test", other_schema.clone())
            .mode(CreateTableMode::Overwrite)
--- a/rust/lancedb/src/data/inspect.rs
+++ b/rust/lancedb/src/data/inspect.rs
@@ -5,9 +5,9 @@ use std::collections::HashMap;

 use arrow::compute::kernels::{aggregate::bool_and, length::length};
 use arrow_array::{
-    Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader,
    cast::AsArray,
    types::{ArrowPrimitiveType, Int32Type, Int64Type},
+    Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader,
 };
 use arrow_ord::cmp::eq;
 use arrow_schema::DataType;
@@ -78,7 +78,7 @@ pub fn infer_vector_columns(
                _ => {
                    return Err(Error::Schema {
                        message: format!("Column {} is not a list", col_name),
-                    });
+                    })
                }
            } {
                if let Some(Some(prev_dim)) = columns_to_infer.get(&col_name) {
@@ -102,8 +102,8 @@ mod tests {
    use super::*;

    use arrow_array::{
-        FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray,
        types::{Float32Type, Float64Type},
+        FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray,
    };
    use arrow_schema::{DataType, Field, Schema};
    use std::{sync::Arc, vec};
--- a/rust/lancedb/src/data/sanitize.rs
+++ b/rust/lancedb/src/data/sanitize.rs
@@ -4,10 +4,10 @@
 use std::{iter::repeat_with, sync::Arc};

 use arrow_array::{
-    Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator,
-    RecordBatchReader,
    cast::AsArray,
    types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type},
+    Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator,
+    RecordBatchReader,
 };
 use arrow_cast::{can_cast_types, cast};
 use arrow_schema::{ArrowError, DataType, Field, Schema};
@@ -184,7 +184,7 @@ mod tests {
    use std::sync::Arc;

    use arrow_array::{
-        FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int8Array, Int32Array,
+        FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int32Array, Int8Array,
        RecordBatch, RecordBatchIterator, StringArray,
    };
    use arrow_schema::Field;
--- a/rust/lancedb/src/data/scannable.rs
+++ b/rust/lancedb/src/data/scannable.rs
@@ -9,21 +9,22 @@

 use std::sync::Arc;

+use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader};
+use arrow_schema::{ArrowError, SchemaRef};
+use async_trait::async_trait;
+use futures::stream::once;
+use futures::StreamExt;
+use lance_datafusion::utils::StreamingWriteSource;
+
 use crate::arrow::{
    SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream,
 };
 use crate::embeddings::{
-    EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, compute_embeddings_for_batch,
-    compute_output_schema,
+    compute_embeddings_for_batch, compute_output_schema, EmbeddingDefinition, EmbeddingFunction,
+    EmbeddingRegistry,
 };
 use crate::table::{ColumnDefinition, ColumnKind, TableDefinition};
 use crate::{Error, Result};
-use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader};
-use arrow_schema::{ArrowError, SchemaRef};
-use async_trait::async_trait;
-use futures::StreamExt;
-use futures::stream::once;
-use lance_datafusion::utils::StreamingWriteSource;

 pub trait Scannable: Send {
    /// Returns the schema of the data.
@@ -348,133 +349,6 @@ pub fn scannable_with_embeddings(
    Ok(inner)
 }

-/// A wrapper that buffers the first RecordBatch from a Scannable so we can
-/// inspect it (e.g. to estimate data size) without losing it.
-pub(crate) struct PeekedScannable {
-    inner: Box<dyn Scannable>,
-    peeked: Option<RecordBatch>,
-    /// The first item from the stream, if it was an error. Stored so we can
-    /// re-emit it from `scan_as_stream` instead of silently dropping it.
-    first_error: Option<crate::Error>,
-    stream: Option<SendableRecordBatchStream>,
-}
-
-impl PeekedScannable {
-    pub fn new(inner: Box<dyn Scannable>) -> Self {
-        Self {
-            inner,
-            peeked: None,
-            first_error: None,
-            stream: None,
-        }
-    }
-
-    /// Reads and buffers the first batch from the inner scannable.
-    /// Returns a clone of it. Subsequent calls return the same batch.
-    ///
-    /// Returns `None` if the stream is empty or the first item is an error.
-    /// Errors are preserved and re-emitted by `scan_as_stream`.
-    pub async fn peek(&mut self) -> Option<RecordBatch> {
-        if self.peeked.is_some() {
-            return self.peeked.clone();
-        }
-        // Already peeked and got an error or empty stream.
-        if self.stream.is_some() || self.first_error.is_some() {
-            return None;
-        }
-        let mut stream = self.inner.scan_as_stream();
-        match stream.next().await {
-            Some(Ok(batch)) => {
-                self.peeked = Some(batch.clone());
-                self.stream = Some(stream);
-                Some(batch)
-            }
-            Some(Err(e)) => {
-                self.first_error = Some(e);
-                self.stream = Some(stream);
-                None
-            }
-            None => {
-                self.stream = Some(stream);
-                None
-            }
-        }
-    }
-}
-
-impl Scannable for PeekedScannable {
-    fn schema(&self) -> SchemaRef {
-        self.inner.schema()
-    }
-
-    fn num_rows(&self) -> Option<usize> {
-        self.inner.num_rows()
-    }
-
-    fn rescannable(&self) -> bool {
-        self.inner.rescannable()
-    }
-
-    fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
-        let schema = self.inner.schema();
-
-        // If peek() hit an error, prepend it so downstream sees the error.
-        let error_item = self.first_error.take().map(Err);
-
-        match (self.peeked.take(), self.stream.take()) {
-            (Some(batch), Some(rest)) => {
-                let prepend = futures::stream::once(std::future::ready(Ok(batch)));
-                Box::pin(SimpleRecordBatchStream {
-                    schema,
-                    stream: prepend.chain(rest),
-                })
-            }
-            (Some(batch), None) => Box::pin(SimpleRecordBatchStream {
-                schema,
-                stream: futures::stream::once(std::future::ready(Ok(batch))),
-            }),
-            (None, Some(rest)) => {
-                if let Some(err) = error_item {
-                    let stream = futures::stream::once(std::future::ready(err));
-                    Box::pin(SimpleRecordBatchStream { schema, stream })
-                } else {
-                    rest
-                }
-            }
-            (None, None) => {
-                // peek() was never called — just delegate
-                self.inner.scan_as_stream()
-            }
-        }
-    }
-}
-
-/// Compute the number of write partitions based on data size estimates.
-///
-/// `sample_bytes` and `sample_rows` come from a representative batch and are
-/// used to estimate per-row size. `total_rows_hint` is the total row count
-/// when known; otherwise `sample_rows` row count is used as a lower bound
-/// estimate.
-///
-/// Targets roughly 1 million rows or 2 GB per partition, capped at
-/// `max_partitions` (typically the number of available CPU cores).
-pub(crate) fn estimate_write_partitions(
-    sample_bytes: usize,
-    sample_rows: usize,
-    total_rows_hint: Option<usize>,
-    max_partitions: usize,
-) -> usize {
-    if sample_rows == 0 {
-        return 1;
-    }
-    let bytes_per_row = sample_bytes / sample_rows;
-    let total_rows = total_rows_hint.unwrap_or(sample_rows);
-    let total_bytes = total_rows * bytes_per_row;
-    let by_rows = total_rows.div_ceil(1_000_000);
-    let by_bytes = total_bytes.div_ceil(2 * 1024 * 1024 * 1024);
-    by_rows.max(by_bytes).max(1).min(max_partitions)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -571,231 +445,6 @@ mod tests {
        assert!(result2.unwrap().is_err());
    }

-    mod peeked_scannable_tests {
-        use crate::test_utils::TestCustomError;
-
-        use super::*;
-
-        #[tokio::test]
-        async fn test_peek_returns_first_batch() {
-            let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
-            let mut peeked = PeekedScannable::new(Box::new(batch.clone()));
-
-            let first = peeked.peek().await.unwrap();
-            assert_eq!(first, batch);
-        }
-
-        #[tokio::test]
-        async fn test_peek_is_idempotent() {
-            let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
-            let mut peeked = PeekedScannable::new(Box::new(batch.clone()));
-
-            let first = peeked.peek().await.unwrap();
-            let second = peeked.peek().await.unwrap();
-            assert_eq!(first, second);
-        }
-
-        #[tokio::test]
-        async fn test_scan_after_peek_returns_all_data() {
-            let batches = vec![
-                record_batch!(("id", Int64, [1, 2])).unwrap(),
-                record_batch!(("id", Int64, [3, 4, 5])).unwrap(),
-            ];
-            let mut peeked = PeekedScannable::new(Box::new(batches.clone()));
-
-            let first = peeked.peek().await.unwrap();
-            assert_eq!(first, batches[0]);
-
-            let result: Vec<RecordBatch> = peeked.scan_as_stream().try_collect().await.unwrap();
-            assert_eq!(result.len(), 2);
-            assert_eq!(result[0], batches[0]);
-            assert_eq!(result[1], batches[1]);
-        }
-
-        #[tokio::test]
-        async fn test_scan_without_peek_passes_through() {
-            let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
-            let mut peeked = PeekedScannable::new(Box::new(batch.clone()));
-
-            let result: Vec<RecordBatch> = peeked.scan_as_stream().try_collect().await.unwrap();
-            assert_eq!(result.len(), 1);
-            assert_eq!(result[0], batch);
-        }
-
-        #[tokio::test]
-        async fn test_delegates_num_rows() {
-            let batches = vec![
-                record_batch!(("id", Int64, [1, 2])).unwrap(),
-                record_batch!(("id", Int64, [3])).unwrap(),
-            ];
-            let peeked = PeekedScannable::new(Box::new(batches));
-            assert_eq!(peeked.num_rows(), Some(3));
-        }
-
-        #[tokio::test]
-        async fn test_non_rescannable_stream_data_preserved() {
-            let batches = vec![
-                record_batch!(("id", Int64, [1, 2])).unwrap(),
-                record_batch!(("id", Int64, [3])).unwrap(),
-            ];
-            let schema = batches[0].schema();
-            let inner = futures::stream::iter(batches.clone().into_iter().map(Ok));
-            let stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
-                schema,
-                stream: inner,
-            });
-
-            let mut peeked = PeekedScannable::new(Box::new(stream));
-            assert!(!peeked.rescannable());
-            assert_eq!(peeked.num_rows(), None);
-
-            let first = peeked.peek().await.unwrap();
-            assert_eq!(first, batches[0]);
-
-            // All data is still available via scan_as_stream
-            let result: Vec<RecordBatch> = peeked.scan_as_stream().try_collect().await.unwrap();
-            assert_eq!(result.len(), 2);
-            assert_eq!(result[0], batches[0]);
-            assert_eq!(result[1], batches[1]);
-        }
-
-        #[tokio::test]
-        async fn test_error_in_first_batch_propagates() {
-            let schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
-                "id",
-                arrow_schema::DataType::Int64,
-                false,
-            )]));
-            let inner = futures::stream::iter(vec![Err(Error::External {
-                source: Box::new(TestCustomError),
-            })]);
-            let stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
-                schema,
-                stream: inner,
-            });
-
-            let mut peeked = PeekedScannable::new(Box::new(stream));
-
-            // peek returns None for errors
-            assert!(peeked.peek().await.is_none());
-
-            // But the error should come through when scanning
-            let mut stream = peeked.scan_as_stream();
-            let first = stream.next().await.unwrap();
-            assert!(first.is_err());
-            let err = first.unwrap_err();
-            assert!(
-                matches!(&err, Error::External { source } if source.downcast_ref::<TestCustomError>().is_some()),
-                "Expected TestCustomError to be preserved, got: {err}"
-            );
-        }
-
-        #[tokio::test]
-        async fn test_error_in_later_batch_propagates() {
-            let good_batch = record_batch!(("id", Int64, [1, 2])).unwrap();
-            let schema = good_batch.schema();
-            let inner = futures::stream::iter(vec![
-                Ok(good_batch.clone()),
-                Err(Error::External {
-                    source: Box::new(TestCustomError),
-                }),
-            ]);
-            let stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
-                schema,
-                stream: inner,
-            });
-
-            let mut peeked = PeekedScannable::new(Box::new(stream));
-
-            // peek succeeds with the first batch
-            let first = peeked.peek().await.unwrap();
-            assert_eq!(first, good_batch);
-
-            // scan_as_stream should yield the first batch, then the error
-            let mut stream = peeked.scan_as_stream();
-            let batch1 = stream.next().await.unwrap().unwrap();
-            assert_eq!(batch1, good_batch);
-
-            let batch2 = stream.next().await.unwrap();
-            assert!(batch2.is_err());
-            let err = batch2.unwrap_err();
-            assert!(
-                matches!(&err, Error::External { source } if source.downcast_ref::<TestCustomError>().is_some()),
-                "Expected TestCustomError to be preserved, got: {err}"
-            );
-        }
-
-        #[tokio::test]
-        async fn test_empty_stream_returns_none() {
-            let schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
-                "id",
-                arrow_schema::DataType::Int64,
-                false,
-            )]));
-            let inner = futures::stream::empty();
-            let stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
-                schema,
-                stream: inner,
-            });
-
-            let mut peeked = PeekedScannable::new(Box::new(stream));
-            assert!(peeked.peek().await.is_none());
-
-            // Scanning an empty (post-peek) stream should yield nothing
-            let result: Vec<RecordBatch> = peeked.scan_as_stream().try_collect().await.unwrap();
-            assert!(result.is_empty());
-        }
-    }
-
-    mod estimate_write_partitions_tests {
-        use super::*;
-
-        #[test]
-        fn test_small_data_single_partition() {
-            // 100 rows * 24 bytes/row = 2400 bytes — well under both thresholds
-            assert_eq!(estimate_write_partitions(2400, 100, Some(100), 8), 1);
-        }
-
-        #[test]
-        fn test_scales_by_row_count() {
-            // 2.5M rows at 24 bytes/row — row threshold dominates
-            // ceil(2_500_000 / 1_000_000) = 3
-            assert_eq!(estimate_write_partitions(72, 3, Some(2_500_000), 8), 3);
-        }
-
-        #[test]
-        fn test_scales_by_byte_size() {
-            // 100k rows at 40KB/row = ~4GB total → ceil(4GB / 2GB) = 2
-            let sample_bytes = 40_000 * 10;
-            assert_eq!(
-                estimate_write_partitions(sample_bytes, 10, Some(100_000), 8),
-                2
-            );
-        }
-
-        #[test]
-        fn test_capped_at_max_partitions() {
-            // 10M rows would want 10 partitions, but capped at 4
-            assert_eq!(estimate_write_partitions(72, 3, Some(10_000_000), 4), 4);
-        }
-
-        #[test]
-        fn test_zero_sample_rows_returns_one() {
-            assert_eq!(estimate_write_partitions(0, 0, Some(1_000_000), 8), 1);
-        }
-
-        #[test]
-        fn test_no_row_hint_uses_sample_size() {
-            // Without a hint, uses sample_rows (3), which is small
-            assert_eq!(estimate_write_partitions(72, 3, None, 8), 1);
-        }
-
-        #[test]
-        fn test_always_at_least_one() {
-            assert_eq!(estimate_write_partitions(24, 1, Some(1), 8), 1);
-        }
-    }
-
    mod embedding_tests {
        use super::*;
        use crate::embeddings::MemoryRegistry;
--- a/rust/lancedb/src/database.rs
+++ b/rust/lancedb/src/database.rs
@@ -19,12 +19,12 @@ use std::sync::Arc;
 use std::time::Duration;

 use lance::dataset::ReadParams;
-use lance_namespace::LanceNamespace;
 use lance_namespace::models::{
    CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
    DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
    ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
 };
+use lance_namespace::LanceNamespace;

 use crate::data::scannable::Scannable;
 use crate::error::Result;
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -8,7 +8,7 @@ use std::path::Path;
 use std::{collections::HashMap, sync::Arc};

 use lance::dataset::refs::Ref;
-use lance::dataset::{ReadParams, WriteMode, builder::DatasetBuilder};
+use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
 use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_encoding::version::LanceFileVersion;
@@ -1097,11 +1097,11 @@ impl Database for ListingDatabase {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::Table;
    use crate::connection::ConnectRequest;
    use crate::data::scannable::Scannable;
    use crate::database::{CreateTableMode, CreateTableRequest};
    use crate::table::WriteOptions;
+    use crate::Table;
    use arrow_array::{Int32Array, RecordBatch, StringArray};
    use arrow_schema::{DataType, Field, Schema};
    use std::path::PathBuf;
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -9,15 +9,16 @@ use std::sync::Arc;
 use async_trait::async_trait;
 use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor};
 use lance_namespace::{
-    LanceNamespace,
    models::{
-        CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
-        DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
-        DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
-        ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
+        CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
+        DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
+        DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
+        ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
    },
+    LanceNamespace,
 };
 use lance_namespace_impls::ConnectBuilder;
+use log::warn;

 use crate::database::ReadConsistency;
 use crate::error::{Error, Result};
@@ -212,18 +213,63 @@ impl Database for LanceNamespaceDatabase {
            ..Default::default()
        };

-        let (location, initial_storage_options) = {
-            let response = self.namespace.declare_table(declare_request).await?;
-            let loc = response.location.ok_or_else(|| Error::Runtime {
-                message: "Table location is missing from declare_table response".to_string(),
-            })?;
-            // Use storage options from response, fall back to self.storage_options
-            let opts = response
-                .storage_options
-                .or_else(|| Some(self.storage_options.clone()))
-                .filter(|o| !o.is_empty());
-            (loc, opts)
-        };
+        let (location, initial_storage_options) =
+            match self.namespace.declare_table(declare_request).await {
+                Ok(response) => {
+                    let loc = response.location.ok_or_else(|| Error::Runtime {
+                        message: "Table location is missing from declare_table response"
+                            .to_string(),
+                    })?;
+                    // Use storage options from response, fall back to self.storage_options
+                    let opts = response
+                        .storage_options
+                        .or_else(|| Some(self.storage_options.clone()))
+                        .filter(|o| !o.is_empty());
+                    (loc, opts)
+                }
+                Err(e) => {
+                    // Check if the error is "not supported" and try create_empty_table as fallback
+                    let err_str = e.to_string().to_lowercase();
+                    if err_str.contains("not supported") || err_str.contains("not implemented") {
+                        warn!(
+                            "declare_table is not supported by the namespace client, \
+                        falling back to deprecated create_empty_table. \
+                        create_empty_table is deprecated and will be removed in Lance 3.0.0. \
+                        Please upgrade your namespace client to support declare_table."
+                        );
+                        #[allow(deprecated)]
+                        let create_empty_request = CreateEmptyTableRequest {
+                            id: Some(table_id.clone()),
+                            ..Default::default()
+                        };
+
+                        #[allow(deprecated)]
+                        let create_response = self
+                            .namespace
+                            .create_empty_table(create_empty_request)
+                            .await
+                            .map_err(|e| Error::Runtime {
+                                message: format!("Failed to create empty table: {}", e),
+                            })?;
+
+                        let loc = create_response.location.ok_or_else(|| Error::Runtime {
+                            message: "Table location is missing from create_empty_table response"
+                                .to_string(),
+                        })?;
+                        // For deprecated path, use self.storage_options
+                        let opts = if self.storage_options.is_empty() {
+                            None
+                        } else {
+                            Some(self.storage_options.clone())
+                        };
+                        (loc, opts)
+                    } else {
+                        return Err(Error::Runtime {
+                            message: format!("Failed to declare table: {}", e),
+                        });
+                    }
+                }
+            };

        let write_params = if let Some(storage_opts) = initial_storage_options {
            let mut params = request.write_options.lance_write_params.unwrap_or_default();
--- a/rust/lancedb/src/dataloader/permutation/builder.rs
+++ b/rust/lancedb/src/dataloader/permutation/builder.rs
@@ -11,16 +11,16 @@ use lance_core::ROW_ID;
 use lance_datafusion::exec::SessionContextExt;

 use crate::{
-    Error, Result, Table,
    arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
    connect,
    database::{CreateTableRequest, Database},
    dataloader::permutation::{
        shuffle::{Shuffler, ShufflerConfig},
-        split::{SPLIT_ID_COLUMN, SplitStrategy, Splitter},
-        util::{TemporaryDirectory, rename_column},
+        split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
+        util::{rename_column, TemporaryDirectory},
    },
    query::{ExecutableQuery, QueryBase, Select},
+    Error, Result, Table,
 };

 pub const SRC_ROW_ID_COL: &str = "row_id";
--- a/rust/lancedb/src/dataloader/permutation/reader.rs
+++ b/rust/lancedb/src/dataloader/permutation/reader.rs
@@ -25,8 +25,8 @@ use futures::{StreamExt, TryStreamExt};
 use lance::dataset::scanner::DatasetRecordBatchStream;
 use lance::io::RecordBatchStream;
 use lance_arrow::RecordBatchExt;
-use lance_core::ROW_ID;
 use lance_core::error::LanceOptionExt;
+use lance_core::ROW_ID;
 use std::collections::HashMap;
 use std::sync::Arc;

@@ -426,7 +426,6 @@ impl PermutationReader {
            row_ids_query = row_ids_query.limit(limit as usize);
        }
        let mut row_ids = row_ids_query.execute().await?;
-        let mut idx_offset = 0;
        while let Some(batch) = row_ids.try_next().await? {
            let row_ids = batch
                .column(0)
@@ -434,9 +433,8 @@ impl PermutationReader {
                .values()
                .to_vec();
            for (i, row_id) in row_ids.iter().enumerate() {
-                offset_map.insert(i as u64 + idx_offset, *row_id);
+                offset_map.insert(i as u64, *row_id);
            }
-            idx_offset += batch.num_rows() as u64;
        }
        let offset_map = Arc::new(offset_map);
        *offset_map_ref = Some(offset_map.clone());
@@ -500,10 +498,10 @@ mod tests {
    use rand::seq::SliceRandom;

    use crate::{
-        Table,
        arrow::SendableRecordBatchStream,
        query::{ExecutableQuery, QueryBase},
-        test_utils::datagen::{LanceDbDatagenExt, virtual_table},
+        test_utils::datagen::{virtual_table, LanceDbDatagenExt},
+        Table,
    };

    use super::*;
@@ -847,106 +845,4 @@ mod tests {
            .to_vec();
        assert_eq!(idx_values, vec![row_ids[2] as i32]);
    }
-
-    #[tokio::test]
-    async fn test_filtered_permutation_full_iteration() {
-        use crate::dataloader::permutation::builder::PermutationBuilder;
-
-        // Create a base table with 10000 rows where idx goes 0..10000.
-        // Filter to even values only, giving 5000 rows in the permutation.
-        let base_table = lance_datagen::gen_batch()
-            .col("idx", lance_datagen::array::step::<Int32Type>())
-            .into_mem_table("tbl", RowCount::from(10000), BatchCount::from(1))
-            .await;
-
-        let permutation_table = PermutationBuilder::new(base_table.clone())
-            .with_filter("idx % 2 = 0".to_string())
-            .build()
-            .await
-            .unwrap();
-
-        assert_eq!(permutation_table.count_rows(None).await.unwrap(), 5000);
-
-        let reader = PermutationReader::try_from_tables(
-            base_table.base_table().clone(),
-            permutation_table.base_table().clone(),
-            0,
-        )
-        .await
-        .unwrap();
-
-        assert_eq!(reader.count_rows(), 5000);
-
-        // Iterate through all batches using a batch size that doesn't evenly divide
-        // the row count (5000 / 128 = 39 full batches + 1 batch of 8 rows).
-        let batch_size = 128;
-        let mut stream = reader
-            .read(
-                Select::All,
-                QueryExecutionOptions {
-                    max_batch_length: batch_size,
-                    ..Default::default()
-                },
-            )
-            .await
-            .unwrap();
-
-        let mut total_rows = 0u64;
-        let mut all_idx_values = Vec::new();
-        while let Some(batch) = stream.try_next().await.unwrap() {
-            assert!(batch.num_rows() <= batch_size as usize);
-            total_rows += batch.num_rows() as u64;
-            let idx_col = batch.column(0).as_primitive::<Int32Type>().values();
-            all_idx_values.extend(idx_col.iter().copied());
-        }
-
-        assert_eq!(total_rows, 5000);
-        assert_eq!(all_idx_values.len(), 5000);
-
-        // Every value should be even (from the filter)
-        assert!(all_idx_values.iter().all(|v| v % 2 == 0));
-
-        // Should have 5000 unique values
-        let unique: std::collections::HashSet<i32> = all_idx_values.iter().copied().collect();
-        assert_eq!(unique.len(), 5000);
-
-        // Use take_offsets to fetch rows from the beginning, middle, and end
-        // of the permutation. The values should match what we saw during iteration.
-
-        // Beginning
-        let batch = reader.take_offsets(&[0, 1, 2], Select::All).await.unwrap();
-        assert_eq!(batch.num_rows(), 3);
-        let idx_values = batch
-            .column(0)
-            .as_primitive::<Int32Type>()
-            .values()
-            .to_vec();
-        assert_eq!(idx_values, &all_idx_values[0..3]);
-
-        // Middle
-        let batch = reader
-            .take_offsets(&[2499, 2500, 2501], Select::All)
-            .await
-            .unwrap();
-        assert_eq!(batch.num_rows(), 3);
-        let idx_values = batch
-            .column(0)
-            .as_primitive::<Int32Type>()
-            .values()
-            .to_vec();
-        assert_eq!(idx_values, &all_idx_values[2499..2502]);
-
-        // End (last 3 rows)
-        let batch = reader
-            .take_offsets(&[4997, 4998, 4999], Select::All)
-            .await
-            .unwrap();
-        assert_eq!(batch.num_rows(), 3);
-        let idx_values = batch
-            .column(0)
-            .as_primitive::<Int32Type>()
-            .values()
-            .to_vec();
-        assert_eq!(idx_values, &all_idx_values[4997..5000]);
-    }
 }
--- a/rust/lancedb/src/dataloader/permutation/shuffle.rs
+++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs
@@ -18,12 +18,12 @@ use lance_io::{
    scheduler::{ScanScheduler, SchedulerConfig},
    utils::CachedFileSize,
 };
-use rand::{Rng, RngCore, seq::SliceRandom};
+use rand::{seq::SliceRandom, Rng, RngCore};

 use crate::{
-    Error, Result,
    arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
-    dataloader::permutation::util::{TemporaryDirectory, non_crypto_rng},
+    dataloader::permutation::util::{non_crypto_rng, TemporaryDirectory},
+    Error, Result,
 };

 #[derive(Debug, Clone)]
@@ -281,7 +281,7 @@ mod tests {
    use datafusion_expr::col;
    use futures::TryStreamExt;
    use lance_datagen::{BatchCount, BatchGeneratorBuilder, ByteCount, RowCount, Seed};
-    use rand::{SeedableRng, rngs::SmallRng};
+    use rand::{rngs::SmallRng, SeedableRng};

    fn test_gen() -> BatchGeneratorBuilder {
        lance_datagen::gen_batch()
--- a/rust/lancedb/src/dataloader/permutation/split.rs
+++ b/rust/lancedb/src/dataloader/permutation/split.rs
@@ -2,8 +2,8 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use std::sync::{
-    Arc,
    atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
+    Arc,
 };

 use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -15,13 +15,13 @@ use lance_arrow::SchemaExt;
 use lance_core::ROW_ID;

 use crate::{
-    Error, Result,
    arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
    dataloader::{
        permutation::shuffle::{Shuffler, ShufflerConfig},
        permutation::util::TemporaryDirectory,
    },
    query::{Query, QueryBase, Select},
+    Error, Result,
 };

 pub const SPLIT_ID_COLUMN: &str = "split_id";
--- a/rust/lancedb/src/dataloader/permutation/util.rs
+++ b/rust/lancedb/src/dataloader/permutation/util.rs
@@ -7,12 +7,12 @@ use arrow_array::RecordBatch;
 use arrow_schema::{Fields, Schema};
 use datafusion_execution::disk_manager::DiskManagerMode;
 use futures::TryStreamExt;
-use rand::{RngCore, SeedableRng, rngs::SmallRng};
+use rand::{rngs::SmallRng, RngCore, SeedableRng};
 use tempfile::TempDir;

 use crate::{
-    Error, Result,
    arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
+    Error, Result,
 };

 /// Directory to use for temporary files
--- a/rust/lancedb/src/embeddings.rs
+++ b/rust/lancedb/src/embeddings.rs
@@ -23,9 +23,9 @@ use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef};
 use serde::{Deserialize, Serialize};

 use crate::{
-    Error,
    error::Result,
    table::{ColumnDefinition, ColumnKind, TableDefinition},
+    Error,
 };

 /// Trait for embedding functions
--- a/rust/lancedb/src/embeddings/bedrock.rs
+++ b/rust/lancedb/src/embeddings/bedrock.rs
@@ -8,7 +8,7 @@ use arrow::array::{AsArray, Float32Builder};
 use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
 use arrow_data::ArrayData;
 use arrow_schema::DataType;
-use serde_json::{Value, json};
+use serde_json::{json, Value};

 use super::EmbeddingFunction;
 use crate::{Error, Result};
--- a/rust/lancedb/src/embeddings/openai.rs
+++ b/rust/lancedb/src/embeddings/openai.rs
@@ -8,9 +8,9 @@ use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
 use arrow_data::ArrayData;
 use arrow_schema::DataType;
 use async_openai::{
-    Client,
    config::OpenAIConfig,
    types::{CreateEmbeddingRequest, Embedding, EmbeddingInput, EncodingFormat},
+    Client,
 };
 use tokio::{runtime::Handle, task};

--- a/rust/lancedb/src/embeddings/sentence_transformers.rs
+++ b/rust/lancedb/src/embeddings/sentence_transformers.rs
@@ -7,7 +7,7 @@ use super::EmbeddingFunction;
 use arrow::{
    array::{AsArray, PrimitiveBuilder},
    datatypes::{
-        ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt8Type, UInt32Type,
+        ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt32Type, UInt8Type,
    },
 };
 use arrow_array::{Array, FixedSizeListArray, PrimitiveArray};
@@ -16,8 +16,8 @@ use arrow_schema::DataType;
 use candle_core::{CpuStorage, Device, Layout, Storage, Tensor};
 use candle_nn::VarBuilder;
 use candle_transformers::models::bert::{BertModel, DTYPE};
-use hf_hub::{Repo, RepoType, api::sync::Api};
-use tokenizers::{PaddingParams, tokenizer::Tokenizer};
+use hf_hub::{api::sync::Api, Repo, RepoType};
+use tokenizers::{tokenizer::Tokenizer, PaddingParams};

 /// Compute embeddings using huggingface sentence-transformers.
 pub struct SentenceTransformersEmbeddingsBuilder {
@@ -230,7 +230,7 @@ impl SentenceTransformersEmbeddings {
            Storage::Cpu(CpuStorage::BF16(_)) => {
                return Err(crate::Error::Runtime {
                    message: "unsupported data type".to_string(),
-                });
+                })
            }
            _ => unreachable!("we already moved the tensor to the CPU device"),
        };
@@ -298,12 +298,12 @@ impl SentenceTransformersEmbeddings {
            DataType::Utf8View => {
                return Err(crate::Error::Runtime {
                    message: "Utf8View not yet implemented".to_string(),
-                });
+                })
            }
            _ => {
                return Err(crate::Error::Runtime {
                    message: "invalid type".to_string(),
-                });
+                })
            }
        };

--- a/rust/lancedb/src/error.rs
+++ b/rust/lancedb/src/error.rs
@@ -97,7 +97,10 @@ pub type Result<T> = std::result::Result<T, Error>;
 impl From<ArrowError> for Error {
    fn from(source: ArrowError) -> Self {
        match source {
-            ArrowError::ExternalError(source) => Self::from_box_error(source),
+            ArrowError::ExternalError(source) => match source.downcast::<Self>() {
+                Ok(e) => *e,
+                Err(source) => Self::External { source },
+            },
            _ => Self::Arrow { source },
        }
    }
@@ -107,7 +110,15 @@ impl From<DataFusionError> for Error {
    fn from(source: DataFusionError) -> Self {
        match source {
            DataFusionError::ArrowError(source, _) => (*source).into(),
-            DataFusionError::External(source) => Self::from_box_error(source),
+            DataFusionError::External(source) => match source.downcast::<Self>() {
+                Ok(e) => *e,
+                Err(source) => match source.downcast::<ArrowError>() {
+                    Ok(arrow_error) => Self::Arrow {
+                        source: *arrow_error,
+                    },
+                    Err(source) => Self::External { source },
+                },
+            },
            other => Self::External {
                source: Box::new(other),
            },
@@ -119,52 +130,15 @@ impl From<lance::Error> for Error {
    fn from(source: lance::Error) -> Self {
        // Try to unwrap external errors that were wrapped by lance
        match source {
-            lance::Error::Wrapped { error, .. } => Self::from_box_error(error),
-            lance::Error::External { source } => Self::from_box_error(source),
+            lance::Error::Wrapped { error, .. } => match error.downcast::<Self>() {
+                Ok(e) => *e,
+                Err(source) => Self::External { source },
+            },
            _ => Self::Lance { source },
        }
    }
 }

-impl Error {
-    fn from_box_error(mut source: Box<dyn std::error::Error + Send + Sync>) -> Self {
-        source = match source.downcast::<Self>() {
-            Ok(e) => match *e {
-                Self::External { source } => return Self::from_box_error(source),
-                other => return other,
-            },
-            Err(source) => source,
-        };
-
-        source = match source.downcast::<lance::Error>() {
-            Ok(e) => match *e {
-                lance::Error::Wrapped { error, .. } => return Self::from_box_error(error),
-                other => return other.into(),
-            },
-            Err(source) => source,
-        };
-
-        source = match source.downcast::<ArrowError>() {
-            Ok(e) => match *e {
-                ArrowError::ExternalError(source) => return Self::from_box_error(source),
-                other => return other.into(),
-            },
-            Err(source) => source,
-        };
-
-        source = match source.downcast::<DataFusionError>() {
-            Ok(e) => match *e {
-                DataFusionError::ArrowError(source, _) => return (*source).into(),
-                DataFusionError::External(source) => return Self::from_box_error(source),
-                other => return other.into(),
-            },
-            Err(source) => source,
-        };
-
-        Self::External { source }
-    }
-}
-
 impl From<object_store::Error> for Error {
    fn from(source: object_store::Error) -> Self {
        Self::ObjectStore { source }
--- a/rust/lancedb/src/expr.rs
+++ b/rust/lancedb/src/expr.rs
@@ -1,131 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-//! Expression builder API for type-safe query construction
-//!
-//! This module provides a fluent API for building expressions that can be used
-//! in filters and projections. It wraps DataFusion's expression system.
-//!
-//! # Examples
-//!
-//! ```rust
-//! use std::ops::Mul;
-//! use lancedb::expr::{col, lit};
-//!
-//! let expr = col("age").gt(lit(18));
-//! let expr = col("age").gt(lit(18)).and(col("status").eq(lit("active")));
-//! let expr = col("price") * lit(1.1);
-//! ```
-
-mod sql;
-
-pub use sql::expr_to_sql_string;
-
-use std::sync::Arc;
-
-use arrow_schema::DataType;
-use datafusion_expr::{Expr, ScalarUDF, expr_fn::cast};
-use datafusion_functions::string::expr_fn as string_expr_fn;
-
-pub use datafusion_expr::{col, lit};
-
-pub use datafusion_expr::Expr as DfExpr;
-
-pub fn lower(expr: Expr) -> Expr {
-    string_expr_fn::lower(expr)
-}
-
-pub fn upper(expr: Expr) -> Expr {
-    string_expr_fn::upper(expr)
-}
-
-pub fn contains(expr: Expr, search: Expr) -> Expr {
-    string_expr_fn::contains(expr, search)
-}
-
-pub fn expr_cast(expr: Expr, data_type: DataType) -> Expr {
-    cast(expr, data_type)
-}
-
-lazy_static::lazy_static! {
-    static ref FUNC_REGISTRY: std::sync::RwLock<std::collections::HashMap<String, Arc<ScalarUDF>>> = {
-        let mut m = std::collections::HashMap::new();
-        m.insert("lower".to_string(), datafusion_functions::string::lower());
-        m.insert("upper".to_string(), datafusion_functions::string::upper());
-        m.insert("contains".to_string(), datafusion_functions::string::contains());
-        m.insert("btrim".to_string(), datafusion_functions::string::btrim());
-        m.insert("ltrim".to_string(), datafusion_functions::string::ltrim());
-        m.insert("rtrim".to_string(), datafusion_functions::string::rtrim());
-        m.insert("concat".to_string(), datafusion_functions::string::concat());
-        m.insert("octet_length".to_string(), datafusion_functions::string::octet_length());
-        std::sync::RwLock::new(m)
-    };
-}
-
-pub fn func(name: impl AsRef<str>, args: Vec<Expr>) -> crate::Result<Expr> {
-    let name = name.as_ref();
-    let registry = FUNC_REGISTRY
-        .read()
-        .map_err(|e| crate::Error::InvalidInput {
-            message: format!("lock poisoned: {}", e),
-        })?;
-    let udf = registry
-        .get(name)
-        .ok_or_else(|| crate::Error::InvalidInput {
-            message: format!("unknown function: {}", name),
-        })?;
-    Ok(Expr::ScalarFunction(
-        datafusion_expr::expr::ScalarFunction::new_udf(udf.clone(), args),
-    ))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_col_lit_comparisons() {
-        let expr = col("age").gt(lit(18));
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.contains("age") && sql.contains("18"));
-
-        let expr = col("name").eq(lit("Alice"));
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.contains("name") && sql.contains("Alice"));
-    }
-
-    #[test]
-    fn test_compound_expression() {
-        let expr = col("age").gt(lit(18)).and(col("status").eq(lit("active")));
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.contains("age") && sql.contains("status"));
-    }
-
-    #[test]
-    fn test_string_functions() {
-        let expr = lower(col("name"));
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.to_lowercase().contains("lower"));
-
-        let expr = contains(col("text"), lit("search"));
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.to_lowercase().contains("contains"));
-    }
-
-    #[test]
-    fn test_func() {
-        let expr = func("lower", vec![col("x")]).unwrap();
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.to_lowercase().contains("lower"));
-
-        let result = func("unknown_func", vec![col("x")]);
-        assert!(result.is_err());
-    }
-
-    #[test]
-    fn test_arithmetic() {
-        let expr = col("price") * lit(1.1);
-        let sql = expr_to_sql_string(&expr).unwrap();
-        assert!(sql.contains("price"));
-    }
-}
--- a/rust/lancedb/src/expr/sql.rs
+++ b/rust/lancedb/src/expr/sql.rs
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use datafusion_expr::Expr;
-use datafusion_sql::unparser;
-
-pub fn expr_to_sql_string(expr: &Expr) -> crate::Result<String> {
-    let ast = unparser::expr_to_sql(expr).map_err(|e| crate::Error::InvalidInput {
-        message: format!("failed to serialize expression to SQL: {}", e),
-    })?;
-    Ok(ast.to_string())
-}
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -9,7 +9,7 @@ use std::time::Duration;
 use vector::IvfFlatIndexBuilder;

 use crate::index::vector::IvfRqIndexBuilder;
-use crate::{DistanceType, Error, Result, table::BaseTable};
+use crate::{table::BaseTable, DistanceType, Error, Result};

 use self::{
    scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
--- a/rust/lancedb/src/index/scalar.rs
+++ b/rust/lancedb/src/index/scalar.rs
@@ -27,7 +27,7 @@
 ///
 /// The btree index does not currently have any parameters though parameters such as the
 /// block size may be added in the future.
-#[derive(Default, Debug, Clone, serde::Serialize)]
+#[derive(Default, Debug, Clone)]
 pub struct BTreeIndexBuilder {}

 impl BTreeIndexBuilder {}
@@ -39,7 +39,7 @@ impl BTreeIndexBuilder {}
 /// This index works best for low-cardinality (i.e., less than 1000 unique values) columns,
 /// where the number of unique values is small.
 /// The bitmap stores a list of row ids where the value is present.
-#[derive(Debug, Clone, Default, serde::Serialize)]
+#[derive(Debug, Clone, Default)]
 pub struct BitmapIndexBuilder {}

 /// Builder for LabelList index.
@@ -48,10 +48,10 @@ pub struct BitmapIndexBuilder {}
 /// support queries with `array_contains_all` and `array_contains_any`
 /// using an underlying bitmap index.
 ///
-#[derive(Debug, Clone, Default, serde::Serialize)]
+#[derive(Debug, Clone, Default)]
 pub struct LabelListIndexBuilder {}

+pub use lance_index::scalar::inverted::query::*;
 pub use lance_index::scalar::FullTextSearchQuery;
 pub use lance_index::scalar::InvertedIndexParams as FtsIndexBuilder;
 pub use lance_index::scalar::InvertedIndexParams;
-pub use lance_index::scalar::inverted::query::*;
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -7,7 +7,6 @@
 //! Vector indices are only supported on fixed-size-list (tensor) columns of floating point
 //! values
 use lance::table::format::{IndexMetadata, Manifest};
-use serde::Serialize;

 use crate::DistanceType;

@@ -182,17 +181,14 @@ macro_rules! impl_hnsw_params_setter {
 /// The partitioning process is called IVF and the `num_partitions` parameter controls how many groups to create.
 ///
 /// Note that training an IVF Flat index on a large dataset is a slow operation and currently is also a memory intensive operation.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone)]
 pub struct IvfFlatIndexBuilder {
-    #[serde(rename = "metric_type")]
    pub(crate) distance_type: DistanceType,

    // IVF
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) target_partition_size: Option<u32>,
 }

@@ -217,17 +213,14 @@ impl IvfFlatIndexBuilder {
 ///
 /// This index compresses vectors using scalar quantization and groups them into IVF partitions.
 /// It offers a balance between search performance and storage footprint.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone)]
 pub struct IvfSqIndexBuilder {
-    #[serde(rename = "metric_type")]
    pub(crate) distance_type: DistanceType,

    // IVF
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) target_partition_size: Option<u32>,
 }

@@ -268,23 +261,18 @@ impl IvfSqIndexBuilder {
 ///
 /// Note that training an IVF PQ index on a large dataset is a slow operation and
 /// currently is also a memory intensive operation.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone)]
 pub struct IvfPqIndexBuilder {
-    #[serde(rename = "metric_type")]
    pub(crate) distance_type: DistanceType,

    // IVF
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) target_partition_size: Option<u32>,

    // PQ
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_sub_vectors: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_bits: Option<u32>,
 }

@@ -335,18 +323,14 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
 ///
 /// Note that training an IVF RQ index on a large dataset is a slow operation and
 /// currently is also a memory intensive operation.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone)]
 pub struct IvfRqIndexBuilder {
    // IVF
-    #[serde(rename = "metric_type")]
    pub(crate) distance_type: DistanceType,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_partitions: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_bits: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) target_partition_size: Option<u32>,
 }

@@ -381,16 +365,13 @@ impl IvfRqIndexBuilder {
 /// quickly find the closest vectors to a query vector.
 ///
 /// The PQ (product quantizer) is used to compress the vectors as the same as IVF PQ.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone)]
 pub struct IvfHnswPqIndexBuilder {
    // IVF
-    #[serde(rename = "metric_type")]
    pub(crate) distance_type: DistanceType,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) target_partition_size: Option<u32>,

    // HNSW
@@ -398,9 +379,7 @@ pub struct IvfHnswPqIndexBuilder {
    pub(crate) ef_construction: u32,

    // PQ
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_sub_vectors: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_bits: Option<u32>,
 }

@@ -436,16 +415,13 @@ impl IvfHnswPqIndexBuilder {
 ///
 /// The SQ (scalar quantizer) is used to compress the vectors,
 /// each vector is mapped to a 8-bit integer vector, 4x compression ratio for float32 vector.
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone)]
 pub struct IvfHnswSqIndexBuilder {
    // IVF
-    #[serde(rename = "metric_type")]
    pub(crate) distance_type: DistanceType,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) num_partitions: Option<u32>,
    pub(crate) sample_rate: u32,
    pub(crate) max_iterations: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) target_partition_size: Option<u32>,

    // HNSW
--- a/rust/lancedb/src/index/waiter.rs
+++ b/rust/lancedb/src/index/waiter.rs
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use crate::Error;
 use crate::error::Result;
 use crate::table::BaseTable;
+use crate::Error;
 use log::debug;
 use std::time::{Duration, Instant};
 use tokio::time::sleep;
--- a/rust/lancedb/src/io/object_store.rs
+++ b/rust/lancedb/src/io/object_store.rs
@@ -5,11 +5,11 @@

 use std::{fmt::Formatter, sync::Arc};

-use futures::{TryFutureExt, stream::BoxStream};
+use futures::{stream::BoxStream, TryFutureExt};
 use lance::io::WrappingObjectStore;
 use object_store::{
-    Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
-    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path,
+    path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart,
 };

 use async_trait::async_trait;
--- a/rust/lancedb/src/io/object_store/io_tracking.rs
+++ b/rust/lancedb/src/io/object_store/io_tracking.rs
@@ -10,9 +10,8 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use lance::io::WrappingObjectStore;
 use object_store::{
-    GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
+    path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
    PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
-    path::Path,
 };

 #[derive(Debug, Default)]
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -169,7 +169,6 @@ pub mod database;
 pub mod dataloader;
 pub mod embeddings;
 pub mod error;
-pub mod expr;
 pub mod index;
 pub mod io;
 pub mod ipc;
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -5,26 +5,26 @@ use std::sync::Arc;
 use std::{future::Future, time::Duration};

 use arrow::compute::concat_batches;
-use arrow_array::{Array, Float16Array, Float32Array, Float64Array, make_array};
+use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array};
 use arrow_schema::{DataType, SchemaRef};
 use datafusion_expr::Expr;
 use datafusion_physical_plan::ExecutionPlan;
-use futures::{FutureExt, TryFutureExt, TryStreamExt, stream, try_join};
+use futures::{stream, try_join, FutureExt, TryFutureExt, TryStreamExt};
 use half::f16;
-use lance::dataset::{ROW_ID, scanner::DatasetRecordBatchStream};
+use lance::dataset::{scanner::DatasetRecordBatchStream, ROW_ID};
 use lance_arrow::RecordBatchExt;
 use lance_datafusion::exec::execute_plan;
-use lance_index::scalar::FullTextSearchQuery;
 use lance_index::scalar::inverted::SCORE_COL;
+use lance_index::scalar::FullTextSearchQuery;
 use lance_index::vector::DIST_COL;
 use lance_io::stream::RecordBatchStreamAdapter;

-use crate::DistanceType;
 use crate::error::{Error, Result};
 use crate::rerankers::rrf::RRFReranker;
-use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result};
+use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker};
 use crate::table::BaseTable;
 use crate::utils::TimeoutStream;
+use crate::DistanceType;
 use crate::{arrow::SendableRecordBatchStream, table::AnyQuery};

 mod hybrid;
@@ -161,11 +161,10 @@ impl IntoQueryVector for &dyn Array {
        if data_type != self.data_type() {
            Err(Error::InvalidInput {
                message: format!(
-                    "failed to create query vector, the input data type was {:?} but the expected data type was {:?}",
-                    self.data_type(),
-                    data_type
-                ),
-            })
+                "failed to create query vector, the input data type was {:?} but the expected data type was {:?}",
+                self.data_type(),
+                data_type
+            )})
        } else {
            let data = self.to_data();
            Ok(make_array(data))
@@ -187,7 +186,7 @@ impl IntoQueryVector for &[f16] {
            DataType::Float32 => {
                let arr: Vec<f32> = self.iter().map(|x| f32::from(*x)).collect();
                Ok(Arc::new(Float32Array::from(arr)))
-            }
+            },
            DataType::Float64 => {
                let arr: Vec<f64> = self.iter().map(|x| f64::from(*x)).collect();
                Ok(Arc::new(Float64Array::from(arr)))
@@ -195,7 +194,8 @@ impl IntoQueryVector for &[f16] {
            _ => Err(Error::InvalidInput {
                message: format!(
                    "failed to create query vector, the input data type was &[f16] but the embedding model \"{}\" expected data type {:?}",
-                    embedding_model_label, data_type
+                    embedding_model_label,
+                    data_type
                ),
            }),
        }
@@ -216,7 +216,7 @@ impl IntoQueryVector for &[f32] {
            DataType::Float32 => {
                let arr: Vec<f32> = self.to_vec();
                Ok(Arc::new(Float32Array::from(arr)))
-            }
+            },
            DataType::Float64 => {
                let arr: Vec<f64> = self.iter().map(|x| *x as f64).collect();
                Ok(Arc::new(Float64Array::from(arr)))
@@ -224,7 +224,8 @@ impl IntoQueryVector for &[f32] {
            _ => Err(Error::InvalidInput {
                message: format!(
                    "failed to create query vector, the input data type was &[f32] but the embedding model \"{}\" expected data type {:?}",
-                    embedding_model_label, data_type
+                    embedding_model_label,
+                    data_type
                ),
            }),
        }
@@ -238,25 +239,26 @@ impl IntoQueryVector for &[f64] {
        embedding_model_label: &str,
    ) -> Result<Arc<dyn Array>> {
        match data_type {
-            DataType::Float16 => {
-                let arr: Vec<f16> = self.iter().map(|x| f16::from_f64(*x)).collect();
-                Ok(Arc::new(Float16Array::from(arr)))
+                DataType::Float16 => {
+                    let arr: Vec<f16> = self.iter().map(|x| f16::from_f64(*x)).collect();
+                    Ok(Arc::new(Float16Array::from(arr)))
+                }
+                DataType::Float32 => {
+                    let arr: Vec<f32> = self.iter().map(|x| *x as f32).collect();
+                    Ok(Arc::new(Float32Array::from(arr)))
+                },
+                DataType::Float64 => {
+                    let arr: Vec<f64> = self.to_vec();
+                    Ok(Arc::new(Float64Array::from(arr)))
+                }
+                _ => Err(Error::InvalidInput {
+                    message: format!(
+                        "failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}",
+                        embedding_model_label,
+                        data_type
+                    ),
+                }),
            }
-            DataType::Float32 => {
-                let arr: Vec<f32> = self.iter().map(|x| *x as f32).collect();
-                Ok(Arc::new(Float32Array::from(arr)))
-            }
-            DataType::Float64 => {
-                let arr: Vec<f64> = self.to_vec();
-                Ok(Arc::new(Float64Array::from(arr)))
-            }
-            _ => Err(Error::InvalidInput {
-                message: format!(
-                    "failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}",
-                    embedding_model_label, data_type
-                ),
-            }),
-        }
    }
 }

@@ -357,28 +359,6 @@ pub trait QueryBase {
    /// on the filter column(s).
    fn only_if(self, filter: impl AsRef<str>) -> Self;

-    /// Only return rows which match the filter, using an expression builder.
-    ///
-    /// Use [`crate::expr`] for building type-safe expressions:
-    ///
-    /// ```
-    /// use lancedb::expr::{col, lit};
-    /// use lancedb::query::{QueryBase, ExecutableQuery};
-    ///
-    /// # use lancedb::Table;
-    /// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
-    /// let results = table.query()
-    ///     .only_if_expr(col("age").gt(lit(18)).and(col("status").eq(lit("active"))))
-    ///     .execute()
-    ///     .await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    ///
-    /// Note: Expression filters are not supported for remote/server-side queries.
-    /// Use [`QueryBase::only_if`] with SQL strings for remote tables.
-    fn only_if_expr(self, filter: datafusion_expr::Expr) -> Self;
-
    /// Perform a full text search on the table.
    ///
    /// The results will be returned in order of BM25 scores.
@@ -488,11 +468,6 @@ impl<T: HasQuery> QueryBase for T {
        self
    }

-    fn only_if_expr(mut self, filter: datafusion_expr::Expr) -> Self {
-        self.mut_query().filter = Some(QueryFilter::Datafusion(filter));
-        self
-    }
-
    fn full_text_search(mut self, query: FullTextSearchQuery) -> Self {
        if self.mut_query().limit.is_none() {
            self.mut_query().limit = Some(DEFAULT_TOP_K);
@@ -1009,13 +984,13 @@ impl VectorQuery {
                message: "minimum_nprobes must be greater than 0".to_string(),
            });
        }
-        if let Some(maximum_nprobes) = self.request.maximum_nprobes
-            && minimum_nprobes > maximum_nprobes
-        {
-            return Err(Error::InvalidInput {
-                message: "minimum_nprobes must be less than or equal to maximum_nprobes"
-                    .to_string(),
-            });
+        if let Some(maximum_nprobes) = self.request.maximum_nprobes {
+            if minimum_nprobes > maximum_nprobes {
+                return Err(Error::InvalidInput {
+                    message: "minimum_nprobes must be less than or equal to maximum_nprobes"
+                        .to_string(),
+                });
+            }
        }
        self.request.minimum_nprobes = minimum_nprobes;
        Ok(self)
@@ -1405,8 +1380,8 @@ mod tests {
    use super::*;
    use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type};
    use arrow_array::{
-        FixedSizeListArray, Float32Array, Int32Array, RecordBatch, StringArray, cast::AsArray,
-        types::Float32Type,
+        cast::AsArray, types::Float32Type, FixedSizeListArray, Float32Array, Int32Array,
+        RecordBatch, StringArray,
    };
    use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
    use futures::{StreamExt, TryStreamExt};
@@ -1414,7 +1389,7 @@ mod tests {
    use rand::seq::IndexedRandom;
    use tempfile::tempdir;

-    use crate::{Table, connect, database::CreateTableMode, index::Index};
+    use crate::{connect, database::CreateTableMode, index::Index, Table};

    #[tokio::test]
    async fn test_setters_getters() {
@@ -1752,13 +1727,11 @@ mod tests {
            .limit(1)
            .execute()
            .await;
-        assert!(
-            error_result
-                .err()
-                .unwrap()
-                .to_string()
-                .contains("No vector column found to match with the query vector dimension: 3")
-        );
+        assert!(error_result
+            .err()
+            .unwrap()
+            .to_string()
+            .contains("No vector column found to match with the query vector dimension: 3"));
    }

    #[tokio::test]
@@ -2010,7 +1983,7 @@ mod tests {

        // Sample 1 - 3 tokens for each string value
        let tokens = ["a", "b", "c", "d", "e"];
-        use rand::{Rng, rng};
+        use rand::{rng, Rng};

        let mut rng = rng();
        let text: StringArray = (0..nrows)
--- a/rust/lancedb/src/query/hybrid.rs
+++ b/rust/lancedb/src/query/hybrid.rs
@@ -5,7 +5,7 @@ use arrow::compute::{
    kernels::numeric::{div, sub},
    max, min,
 };
-use arrow_array::{Float32Array, RecordBatch, cast::downcast_array};
+use arrow_array::{cast::downcast_array, Float32Array, RecordBatch};
 use arrow_schema::{DataType, Field, Schema, SortOptions};
 use lance::dataset::ROW_ID;
 use lance_index::{scalar::inverted::SCORE_COL, vector::DIST_COL};
@@ -253,10 +253,7 @@ mod test {
        let result = rank(batch.clone(), "bad_col", None);
        match result {
            Err(Error::InvalidInput { message }) => {
-                assert_eq!(
-                    "expected column bad_col not found in rank. found columns [\"name\", \"score\"]",
-                    message
-                );
+                assert_eq!("expected column bad_col not found in rank. found columns [\"name\", \"score\"]", message);
            }
            _ => {
                panic!("expected invalid input error, received {:?}", result)
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -4,8 +4,8 @@
 use http::HeaderName;
 use log::debug;
 use reqwest::{
-    Body, Request, RequestBuilder, Response,
    header::{HeaderMap, HeaderValue},
+    Body, Request, RequestBuilder, Response,
 };
 use std::{collections::HashMap, future::Future, str::FromStr, sync::Arc, time::Duration};

@@ -446,23 +446,13 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
                })?,
            );
        }
-        // Map azure storage options to x-azure-* headers.
-        // The option key uses underscores (e.g. "azure_client_id") while the
-        // header uses hyphens (e.g. "x-azure-client-id").
-        let azure_opts: [(&str, &str); 3] = [
-            ("azure_storage_account_name", "x-azure-storage-account-name"),
-            ("azure_client_id", "x-azure-client-id"),
-            ("azure_tenant_id", "x-azure-tenant-id"),
-        ];
-        for (opt_key, header_name) in azure_opts {
-            if let Some(v) = options.0.get(opt_key) {
-                headers.insert(
-                    HeaderName::from_static(header_name),
-                    HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
-                        message: format!("non-ascii value '{}' for option '{}'", v, opt_key),
-                    })?,
-                );
-            }
+        if let Some(v) = options.0.get("azure_storage_account_name") {
+            headers.insert(
+                HeaderName::from_static("x-azure-storage-account-name"),
+                HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
+                    message: format!("non-ascii storage account name '{}' provided", db_name),
+                })?,
+            );
        }

        for (key, value) in &config.extra_headers {
@@ -660,13 +650,14 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
    pub fn extract_request_id(&self, request: &mut Request) -> String {
        // Set a request id.
        // TODO: allow the user to supply this, through middleware?
-        if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
+        let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
            request_id.to_str().unwrap().to_string()
        } else {
            let request_id = uuid::Uuid::new_v4().to_string();
            self.set_request_id(request, &request_id);
            request_id
-        }
+        };
+        request_id
    }

    /// Set the request ID header
@@ -1085,34 +1076,4 @@ mod tests {
            _ => panic!("Expected Runtime error"),
        }
    }
-
-    #[test]
-    fn test_default_headers_azure_opts() {
-        let mut opts = HashMap::new();
-        opts.insert(
-            "azure_storage_account_name".to_string(),
-            "myaccount".to_string(),
-        );
-        opts.insert("azure_client_id".to_string(), "my-client-id".to_string());
-        opts.insert("azure_tenant_id".to_string(), "my-tenant-id".to_string());
-        let remote_opts = RemoteOptions::new(opts);
-
-        let headers = RestfulLanceDbClient::<Sender>::default_headers(
-            "test-key",
-            "us-east-1",
-            "testdb",
-            false,
-            &remote_opts,
-            None,
-            &ClientConfig::default(),
-        )
-        .unwrap();
-
-        assert_eq!(
-            headers.get("x-azure-storage-account-name").unwrap(),
-            "myaccount"
-        );
-        assert_eq!(headers.get("x-azure-client-id").unwrap(), "my-client-id");
-        assert_eq!(headers.get("x-azure-tenant-id").unwrap(), "my-tenant-id");
-    }
 }
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -16,7 +16,6 @@ use lance_namespace::models::{
    ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
 };

-use crate::Error;
 use crate::database::{
    CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
    OpenTableRequest, ReadConsistency, TableNamesRequest,
@@ -24,11 +23,12 @@ use crate::database::{
 use crate::error::Result;
 use crate::remote::util::stream_as_body;
 use crate::table::BaseTable;
+use crate::Error;

-use super::ARROW_STREAM_CONTENT_TYPE;
 use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
 use super::table::RemoteTable;
 use super::util::parse_server_version;
+use super::ARROW_STREAM_CONTENT_TYPE;

 // Request structure for the remote clone table API
 #[derive(serde::Serialize)]
@@ -249,9 +249,9 @@ impl RemoteDatabase {
 #[cfg(all(test, feature = "remote"))]
 mod test_utils {
    use super::*;
-    use crate::remote::ClientConfig;
    use crate::remote::client::test_utils::MockSender;
    use crate::remote::client::test_utils::{client_with_handler, client_with_handler_and_config};
+    use crate::remote::ClientConfig;

    impl RemoteDatabase<MockSender> {
        pub fn new_mock<F, T>(handler: F) -> Self
@@ -777,12 +777,7 @@ impl RemoteOptions {

 impl From<StorageOptions> for RemoteOptions {
    fn from(options: StorageOptions) -> Self {
-        let supported_opts = vec![
-            "account_name",
-            "azure_storage_account_name",
-            "azure_client_id",
-            "azure_tenant_id",
-        ];
+        let supported_opts = vec!["account_name", "azure_storage_account_name"];
        let mut filtered = HashMap::new();
        for opt in supported_opts {
            if let Some(v) = options.0.get(opt) {
@@ -804,9 +799,9 @@ mod tests {

    use crate::connection::ConnectBuilder;
    use crate::{
-        Connection, Error,
        database::CreateTableMode,
-        remote::{ARROW_STREAM_CONTENT_TYPE, ClientConfig, HeaderProvider, JSON_CONTENT_TYPE},
+        remote::{ClientConfig, HeaderProvider, ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
+        Connection, Error,
    };

    #[test]
--- a/rust/lancedb/src/remote/retry.rs
+++ b/rust/lancedb/src/remote/retry.rs
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use crate::Error;
 use crate::remote::RetryConfig;
+use crate::Error;
 use log::debug;
 use std::time::Duration;

--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -4,16 +4,16 @@
 pub mod insert;

 use self::insert::RemoteInsertExec;
-use crate::expr::expr_to_sql_string;

-use super::ARROW_STREAM_CONTENT_TYPE;
 use super::client::RequestResultExt;
 use super::client::{HttpSend, RestfulLanceDbClient, Sender};
 use super::db::ServerVersion;
+use super::ARROW_STREAM_CONTENT_TYPE;
+use crate::index::waiter::wait_for_index;
 use crate::index::Index;
 use crate::index::IndexStatistics;
-use crate::index::waiter::wait_for_index;
 use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
+use crate::table::query::create_multi_vector_plan;
 use crate::table::AddColumnsResult;
 use crate::table::AddResult;
 use crate::table::AlterColumnsResult;
@@ -22,20 +22,19 @@ use crate::table::DropColumnsResult;
 use crate::table::MergeResult;
 use crate::table::Tags;
 use crate::table::UpdateResult;
-use crate::table::query::create_multi_vector_plan;
 use crate::table::{AnyQuery, Filter, TableStatistics};
 use crate::utils::background_cache::BackgroundCache;
 use crate::utils::{supported_btree_data_type, supported_vector_data_type};
-use crate::{DistanceType, Error};
 use crate::{
    error::Result,
    index::{IndexBuilder, IndexConfig},
    query::QueryExecutionOptions,
    table::{
-        AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, TableDefinition, UpdateBuilder,
-        merge::MergeInsertBuilder,
+        merge::MergeInsertBuilder, AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats,
+        TableDefinition, UpdateBuilder,
    },
 };
+use crate::{DistanceType, Error};
 use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
 use arrow_ipc::reader::FileReader;
 use arrow_schema::{DataType, SchemaRef};
@@ -50,7 +49,7 @@ use lance::arrow::json::{JsonDataType, JsonSchema};
 use lance::dataset::refs::TagContents;
 use lance::dataset::scanner::DatasetRecordBatchStream;
 use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
-use lance_datafusion::exec::{OneShotExec, execute_plan};
+use lance_datafusion::exec::{execute_plan, OneShotExec};
 use reqwest::{RequestBuilder, Response};
 use serde::{Deserialize, Serialize};
 use serde_json::Number;
@@ -202,6 +201,7 @@ impl<S: HttpSend + 'static> Tags for RemoteTags<'_, S> {
 }

 pub struct RemoteTable<S: HttpSend = Sender> {
+    #[allow(dead_code)]
    client: RestfulLanceDbClient<S>,
    name: String,
    namespace: Vec<String>,
@@ -447,17 +447,13 @@ impl<S: HttpSend> RemoteTable<S> {
        body["k"] = serde_json::Value::Number(serde_json::Number::from(limit));

        if let Some(filter) = &params.filter {
-            let filter_sql = match filter {
-                QueryFilter::Sql(sql) => sql.clone(),
-                QueryFilter::Datafusion(expr) => expr_to_sql_string(expr)?,
-                QueryFilter::Substrait(_) => {
-                    return Err(Error::NotSupported {
-                        message: "Substrait filters are not supported for remote queries"
-                            .to_string(),
-                    });
-                }
-            };
-            body["filter"] = serde_json::Value::String(filter_sql);
+            if let QueryFilter::Sql(filter) = filter {
+                body["filter"] = serde_json::Value::String(filter.clone());
+            } else {
+                return Err(Error::NotSupported {
+                    message: "querying a remote table with a non-sql filter".to_string(),
+                });
+            }
        }

        match &params.select {
@@ -612,8 +608,8 @@ impl<S: HttpSend> RemoteTable<S> {
                message: format!(
                    "Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
                    version
-                ),
-            }),
+                )
+            })
        }
    }

@@ -697,10 +693,10 @@ impl<S: HttpSend> RemoteTable<S> {
            Error::Retry { status_code, .. } => *status_code,
            _ => None,
        };
-        if let Some(status_code) = status_code
-            && Self::should_invalidate_cache_for_status(status_code)
-        {
-            self.invalidate_schema_cache();
+        if let Some(status_code) = status_code {
+            if Self::should_invalidate_cache_for_status(status_code) {
+                self.invalidate_schema_cache();
+            }
        }
    }
 }
@@ -783,9 +779,9 @@ impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
 #[cfg(all(test, feature = "remote"))]
 mod test_utils {
    use super::*;
-    use crate::remote::ClientConfig;
    use crate::remote::client::test_utils::client_with_handler;
-    use crate::remote::client::test_utils::{MockSender, client_with_handler_and_config};
+    use crate::remote::client::test_utils::{client_with_handler_and_config, MockSender};
+    use crate::remote::ClientConfig;

    impl RemoteTable<MockSender> {
        pub fn new_mock<F, T>(name: String, handler: F, version: Option<semver::Version>) -> Self
@@ -945,12 +941,12 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
        let version = self.current_version().await;

        if let Some(filter) = filter {
-            let filter_sql = match filter {
-                Filter::Sql(sql) => sql.clone(),
-                Filter::Datafusion(expr) => expr_to_sql_string(&expr)?,
+            let Filter::Sql(filter) = filter else {
+                return Err(Error::NotSupported {
+                    message: "querying a remote table with a datafusion filter".to_string(),
+                });
            };
-            request =
-                request.json(&serde_json::json!({ "predicate": filter_sql, "version": version }));
+            request = request.json(&serde_json::json!({ "predicate": filter, "version": version }));
        } else {
            let body = serde_json::json!({ "version": version });
            request = request.json(&body);
@@ -1227,10 +1223,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
        let body = response.text().await.err_to_http(request_id.clone())?;
        if body.trim().is_empty() {
            // Backward compatible with old servers
-            return Ok(DeleteResult {
-                num_deleted_rows: 0,
-                version: 0,
-            });
+            return Ok(DeleteResult { version: 0 });
        }
        let delete_response: DeleteResult =
            serde_json::from_str(&body).map_err(|e| Error::Http {
@@ -1251,13 +1244,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            0 => {
                return Err(Error::InvalidInput {
                    message: "No columns specified".into(),
-                });
+                })
            }
            1 => index.columns.pop().unwrap(),
            _ => {
                return Err(Error::NotSupported {
                    message: "Indices over multiple columns not yet supported".into(),
-                });
+                })
            }
        };
        let mut body = serde_json::json!({
@@ -1276,24 +1269,73 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            );
        }

-        fn to_json(params: &impl serde::Serialize) -> crate::Result<serde_json::Value> {
-            serde_json::to_value(params).map_err(|e| Error::InvalidInput {
-                message: format!("failed to serialize index params {:?}", e),
-            })
-        }
-
-        // Map each Index variant to its wire type name and serializable params.
-        // Auto is special-cased since it needs schema inspection.
-        let (index_type_str, params) = match &index.index {
-            Index::IvfFlat(p) => ("IVF_FLAT", Some(to_json(p)?)),
-            Index::IvfPq(p) => ("IVF_PQ", Some(to_json(p)?)),
-            Index::IvfSq(p) => ("IVF_SQ", Some(to_json(p)?)),
-            Index::IvfHnswSq(p) => ("IVF_HNSW_SQ", Some(to_json(p)?)),
-            Index::IvfRq(p) => ("IVF_RQ", Some(to_json(p)?)),
-            Index::BTree(p) => ("BTREE", Some(to_json(p)?)),
-            Index::Bitmap(p) => ("BITMAP", Some(to_json(p)?)),
-            Index::LabelList(p) => ("LABEL_LIST", Some(to_json(p)?)),
-            Index::FTS(p) => ("FTS", Some(to_json(p)?)),
+        match index.index {
+            // TODO: Should we pass the actual index parameters? SaaS does not
+            // yet support them.
+            Index::IvfFlat(index) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_FLAT".to_string());
+                body[METRIC_TYPE_KEY] =
+                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
+                if let Some(num_partitions) = index.num_partitions {
+                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
+                }
+            }
+            Index::IvfPq(index) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
+                body[METRIC_TYPE_KEY] =
+                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
+                if let Some(num_partitions) = index.num_partitions {
+                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
+                }
+                if let Some(num_bits) = index.num_bits {
+                    body["num_bits"] = serde_json::Value::Number(num_bits.into());
+                }
+            }
+            Index::IvfSq(index) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_SQ".to_string());
+                body[METRIC_TYPE_KEY] =
+                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
+                if let Some(num_partitions) = index.num_partitions {
+                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
+                }
+            }
+            Index::IvfHnswSq(index) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
+                body[METRIC_TYPE_KEY] =
+                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
+                if let Some(num_partitions) = index.num_partitions {
+                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
+                }
+            }
+            Index::IvfRq(index) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_RQ".to_string());
+                body[METRIC_TYPE_KEY] =
+                    serde_json::Value::String(index.distance_type.to_string().to_lowercase());
+                if let Some(num_partitions) = index.num_partitions {
+                    body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
+                }
+                if let Some(num_bits) = index.num_bits {
+                    body["num_bits"] = serde_json::Value::Number(num_bits.into());
+                }
+            }
+            Index::BTree(_) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
+            }
+            Index::Bitmap(_) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("BITMAP".to_string());
+            }
+            Index::LabelList(_) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("LABEL_LIST".to_string());
+            }
+            Index::FTS(fts) => {
+                body[INDEX_TYPE_KEY] = serde_json::Value::String("FTS".to_string());
+                let params = serde_json::to_value(&fts).map_err(|e| Error::InvalidInput {
+                    message: format!("failed to serialize FTS index params {:?}", e),
+                })?;
+                for (key, value) in params.as_object().unwrap() {
+                    body[key] = value.clone();
+                }
+            }
            Index::Auto => {
                let schema = self.schema().await?;
                let field = schema
@@ -1302,11 +1344,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
                        message: format!("Column {} not found in schema", column),
                    })?;
                if supported_vector_data_type(field.data_type()) {
+                    body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
                    body[METRIC_TYPE_KEY] =
                        serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
-                    ("IVF_PQ", None)
                } else if supported_btree_data_type(field.data_type()) {
-                    ("BTREE", None)
+                    body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
                } else {
                    return Err(Error::NotSupported {
                        message: format!(
@@ -1320,17 +1362,10 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
            _ => {
                return Err(Error::NotSupported {
                    message: "Index type not supported".into(),
-                });
+                })
            }
        };

-        body[INDEX_TYPE_KEY] = index_type_str.into();
-        if let Some(params) = params {
-            for (key, value) in params.as_object().expect("params should be a JSON object") {
-                body[key] = value.clone();
-            }
-        }
-
        let request = request.json(&body);

        let (request_id, response) = self.send(request, true).await?;
@@ -1771,8 +1806,8 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {

 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;
    use std::time::Duration;
    use std::{collections::HashMap, pin::Pin};

@@ -1781,27 +1816,25 @@ mod tests {
    use crate::table::AddDataMode;

    use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
-    use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch};
+    use arrow_array::{record_batch, Int32Array, RecordBatch, RecordBatchIterator};
    use arrow_schema::{DataType, Field, Schema};
    use chrono::{DateTime, Utc};
-    use futures::{StreamExt, TryFutureExt, future::BoxFuture};
+    use futures::{future::BoxFuture, StreamExt, TryFutureExt};
    use lance_index::scalar::inverted::query::MatchQuery;
    use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
    use reqwest::Body;
    use rstest::rstest;
    use serde_json::json;

-    use crate::index::vector::{
-        IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
-    };
-    use crate::remote::JSON_CONTENT_TYPE;
+    use crate::index::vector::{IvfFlatIndexBuilder, IvfHnswSqIndexBuilder};
    use crate::remote::db::DEFAULT_SERVER_VERSION;
+    use crate::remote::JSON_CONTENT_TYPE;
    use crate::utils::background_cache::clock;
    use crate::{
-        DistanceType, Error, Table,
-        index::{Index, IndexStatistics, IndexType, vector::IvfPqIndexBuilder},
+        index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType},
        query::{ExecutableQuery, QueryBase},
        remote::ARROW_FILE_CONTENT_TYPE,
+        DistanceType, Error, Table,
    };

    #[tokio::test]
@@ -2030,13 +2063,11 @@ mod tests {
                    .unwrap(),
                "/v1/table/my_table/insert/" => {
                    assert_eq!(request.method(), "POST");
-                    assert!(
-                        request
-                            .url()
-                            .query_pairs()
-                            .filter(|(k, _)| k == "mode")
-                            .all(|(_, v)| v == "append")
-                    );
+                    assert!(request
+                        .url()
+                        .query_pairs()
+                        .filter(|(k, _)| k == "mode")
+                        .all(|(_, v)| v == "append"));
                    assert_eq!(
                        request.headers().get("Content-Type").unwrap(),
                        ARROW_STREAM_CONTENT_TYPE
@@ -2957,8 +2988,6 @@ mod tests {
                "IVF_FLAT",
                json!({
                    "metric_type": "hamming",
-                    "sample_rate": 256,
-                    "max_iterations": 50,
                }),
                Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
            ),
@@ -2967,8 +2996,6 @@ mod tests {
                json!({
                    "metric_type": "hamming",
                    "num_partitions": 128,
-                    "sample_rate": 256,
-                    "max_iterations": 50,
                }),
                Index::IvfFlat(
                    IvfFlatIndexBuilder::default()
@@ -2980,8 +3007,6 @@ mod tests {
                "IVF_PQ",
                json!({
                    "metric_type": "l2",
-                    "sample_rate": 256,
-                    "max_iterations": 50,
                }),
                Index::IvfPq(Default::default()),
            ),
@@ -2991,8 +3016,6 @@ mod tests {
                    "metric_type": "cosine",
                    "num_partitions": 128,
                    "num_bits": 4,
-                    "sample_rate": 256,
-                    "max_iterations": 50,
                }),
                Index::IvfPq(
                    IvfPqIndexBuilder::default()
@@ -3001,29 +3024,10 @@ mod tests {
                        .num_bits(4),
                ),
            ),
-            (
-                "IVF_PQ",
-                json!({
-                    "metric_type": "l2",
-                    "num_sub_vectors": 16,
-                    "sample_rate": 512,
-                    "max_iterations": 100,
-                }),
-                Index::IvfPq(
-                    IvfPqIndexBuilder::default()
-                        .num_sub_vectors(16)
-                        .sample_rate(512)
-                        .max_iterations(100),
-                ),
-            ),
            (
                "IVF_HNSW_SQ",
                json!({
                    "metric_type": "l2",
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                    "m": 20,
-                    "ef_construction": 300,
                }),
                Index::IvfHnswSq(Default::default()),
            ),
@@ -3032,65 +3036,11 @@ mod tests {
                json!({
                    "metric_type": "l2",
                    "num_partitions": 128,
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                    "m": 40,
-                    "ef_construction": 500,
                }),
                Index::IvfHnswSq(
                    IvfHnswSqIndexBuilder::default()
                        .distance_type(DistanceType::L2)
-                        .num_partitions(128)
-                        .num_edges(40)
-                        .ef_construction(500),
-                ),
-            ),
-            (
-                "IVF_SQ",
-                json!({
-                    "metric_type": "l2",
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                }),
-                Index::IvfSq(Default::default()),
-            ),
-            (
-                "IVF_SQ",
-                json!({
-                    "metric_type": "cosine",
-                    "num_partitions": 64,
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                }),
-                Index::IvfSq(
-                    IvfSqIndexBuilder::default()
-                        .distance_type(DistanceType::Cosine)
-                        .num_partitions(64),
-                ),
-            ),
-            (
-                "IVF_RQ",
-                json!({
-                    "metric_type": "l2",
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                }),
-                Index::IvfRq(Default::default()),
-            ),
-            (
-                "IVF_RQ",
-                json!({
-                    "metric_type": "cosine",
-                    "num_partitions": 64,
-                    "num_bits": 8,
-                    "sample_rate": 256,
-                    "max_iterations": 50,
-                }),
-                Index::IvfRq(
-                    IvfRqIndexBuilder::default()
-                        .distance_type(DistanceType::Cosine)
-                        .num_partitions(64)
-                        .num_bits(8),
+                        .num_partitions(128),
                ),
            ),
            // HNSW_PQ isn't yet supported on SaaS
@@ -3594,7 +3544,7 @@ mod tests {
    }

    fn _make_table_with_indices(unindexed_rows: usize) -> Table {
-        Table::new_with_handler("my_table", move |request| {
+        let table = Table::new_with_handler("my_table", move |request| {
            assert_eq!(request.method(), "POST");

            let response_body = match request.url().path() {
@@ -3638,7 +3588,8 @@ mod tests {
            let body = serde_json::to_string(&response_body).unwrap();
            let status = if body == "null" { 404 } else { 200 };
            http::Response::builder().status(status).body(body).unwrap()
-        })
+        });
+        table
    }

    #[tokio::test]
@@ -3849,8 +3800,8 @@ mod tests {

    #[tokio::test]
    async fn test_uri_caching() {
-        use std::sync::Arc;
        use std::sync::atomic::{AtomicUsize, Ordering};
+        use std::sync::Arc;

        let call_count = Arc::new(AtomicUsize::new(0));
        let call_count_clone = call_count.clone();
@@ -4684,60 +4635,4 @@ mod tests {
        assert_eq!(result.version, 3);
        assert_eq!(attempt.load(Ordering::SeqCst), 3);
    }
-
-    #[tokio::test]
-    async fn test_query_with_datafusion_filter() {
-        use datafusion_expr::{col, lit};
-
-        let expected_data = RecordBatch::try_new(
-            Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-        )
-        .unwrap();
-        let expected_data_ref = expected_data.clone();
-
-        let table = Table::new_with_handler("my_table", move |request| {
-            assert_eq!(request.method(), "POST");
-            assert_eq!(request.url().path(), "/v1/table/my_table/query/");
-
-            let body = request.body().unwrap().as_bytes().unwrap();
-            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
-
-            // The Datafusion expression should be serialized to SQL
-            let filter = body.get("filter").expect("filter should be present");
-            let filter_str = filter.as_str().expect("filter should be a string");
-            // col("x") > lit(10) AND col("status") = lit("active")
-            assert!(
-                filter_str.contains("x") && filter_str.contains("10"),
-                "Filter should contain 'x' and '10', got: {}",
-                filter_str
-            );
-            assert!(
-                filter_str.contains("status") && filter_str.contains("active"),
-                "Filter should contain 'status' and 'active', got: {}",
-                filter_str
-            );
-
-            let response_body = write_ipc_file(&expected_data_ref);
-            http::Response::builder()
-                .status(200)
-                .header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
-                .body(response_body)
-                .unwrap()
-        });
-
-        // Use only_if_expr with a Datafusion expression
-        let expr = col("x").gt(lit(10)).and(col("status").eq(lit("active")));
-        let data = table
-            .query()
-            .only_if_expr(expr)
-            .execute()
-            .await
-            .unwrap()
-            .collect::<Vec<_>>()
-            .await;
-
-        assert_eq!(data.len(), 1);
-        assert_eq!(data[0].as_ref().unwrap(), &expected_data);
-    }
 }
--- a/rust/lancedb/src/remote/table/insert.rs
+++ b/rust/lancedb/src/remote/table/insert.rs
@@ -16,12 +16,12 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, Plan
 use futures::StreamExt;
 use http::header::CONTENT_TYPE;

-use crate::Error;
-use crate::remote::ARROW_STREAM_CONTENT_TYPE;
 use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender};
 use crate::remote::table::RemoteTable;
-use crate::table::AddResult;
+use crate::remote::ARROW_STREAM_CONTENT_TYPE;
 use crate::table::datafusion::insert::COUNT_SCHEMA;
+use crate::table::AddResult;
+use crate::Error;

 /// ExecutionPlan for inserting data into a remote LanceDB table.
 ///
@@ -309,12 +309,12 @@ mod tests {
    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
    use datafusion::prelude::SessionContext;
    use datafusion_catalog::MemTable;
-    use std::sync::Arc;
    use std::sync::atomic::{AtomicUsize, Ordering};
+    use std::sync::Arc;

-    use crate::Table;
    use crate::remote::ARROW_STREAM_CONTENT_TYPE;
    use crate::table::datafusion::BaseTableAdapter;
+    use crate::Table;

    fn schema_json() -> &'static str {
        r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"#
--- a/rust/lancedb/src/remote/util.rs
+++ b/rust/lancedb/src/remote/util.rs
@@ -5,7 +5,7 @@ use arrow_ipc::CompressionType;
 use futures::{Stream, StreamExt};
 use reqwest::Response;

-use crate::{Result, arrow::SendableRecordBatchStream};
+use crate::{arrow::SendableRecordBatchStream, Result};

 use super::db::ServerVersion;

--- a/rust/lancedb/src/rerankers/rrf.rs
+++ b/rust/lancedb/src/rerankers/rrf.rs
@@ -14,7 +14,7 @@ use async_trait::async_trait;
 use lance::dataset::ROW_ID;

 use crate::error::{Error, Result};
-use crate::rerankers::{RELEVANCE_SCORE, Reranker};
+use crate::rerankers::{Reranker, RELEVANCE_SCORE};

 /// Reranks the results using Reciprocal Rank Fusion(RRF) algorithm based
 /// on the scores of vector and FTS search.
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -6,31 +6,31 @@
 use arrow_array::{RecordBatch, RecordBatchReader};
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use async_trait::async_trait;
-use datafusion_execution::TaskContext;
 use datafusion_expr::Expr;
-use datafusion_physical_plan::ExecutionPlan;
 use datafusion_physical_plan::display::DisplayableExecutionPlan;
+use datafusion_physical_plan::ExecutionPlan;
 use futures::StreamExt;
-use futures::stream::FuturesUnordered;
+use futures::TryStreamExt;
+use lance::dataset::builder::DatasetBuilder;
 pub use lance::dataset::ColumnAlteration;
 pub use lance::dataset::NewColumnTransform;
 pub use lance::dataset::ReadParams;
 pub use lance::dataset::Version;
 use lance::dataset::WriteMode;
-use lance::dataset::builder::DatasetBuilder;
 use lance::dataset::{InsertBuilder, WriteParams};
-use lance::index::vector::VectorIndexParams;
 use lance::index::vector::utils::infer_vector_dim;
+use lance::index::vector::VectorIndexParams;
 use lance::io::{ObjectStoreParams, WrappingObjectStore};
+use lance_datafusion::exec::execute_plan;
 use lance_datafusion::utils::StreamingWriteSource;
-use lance_index::DatasetIndexExt;
-use lance_index::IndexType;
 use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
 use lance_index::vector::bq::RQBuildParams;
 use lance_index::vector::hnsw::builder::HnswBuildParams;
 use lance_index::vector::ivf::IvfBuildParams;
 use lance_index::vector::pq::PQBuildParams;
 use lance_index::vector::sq::builder::SQBuildParams;
+use lance_index::DatasetIndexExt;
+use lance_index::IndexType;
 use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
 pub use query::AnyQuery;

@@ -43,19 +43,19 @@ use std::format;
 use std::path::Path;
 use std::sync::Arc;

-use crate::data::scannable::{PeekedScannable, Scannable, estimate_write_partitions};
+use crate::data::scannable::Scannable;
 use crate::database::Database;
 use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MemoryRegistry};
 use crate::error::{Error, Result};
-use crate::index::IndexStatistics;
 use crate::index::vector::VectorIndex;
-use crate::index::{Index, IndexBuilder, vector::suggested_num_sub_vectors};
+use crate::index::IndexStatistics;
+use crate::index::{vector::suggested_num_sub_vectors, Index, IndexBuilder};
 use crate::index::{IndexConfig, IndexStatisticsImpl};
 use crate::query::{IntoQueryVector, Query, QueryExecutionOptions, TakeQuery, VectorQuery};
 use crate::table::datafusion::insert::InsertExec;
 use crate::utils::{
-    PatchReadParam, PatchWriteParam, supported_bitmap_data_type, supported_btree_data_type,
-    supported_fts_data_type, supported_label_list_data_type, supported_vector_data_type,
+    supported_bitmap_data_type, supported_btree_data_type, supported_fts_data_type,
+    supported_label_list_data_type, supported_vector_data_type, PatchReadParam, PatchWriteParam,
 };

 use self::dataset::DatasetConsistencyWrapper;
@@ -2113,7 +2113,7 @@ impl BaseTable for NativeTable {
        }
    }

-    async fn add(&self, mut add: AddDataBuilder) -> Result<AddResult> {
+    async fn add(&self, add: AddDataBuilder) -> Result<AddResult> {
        let table_def = self.table_definition().await?;

        self.dataset.ensure_mutable()?;
@@ -2122,22 +2122,6 @@ impl BaseTable for NativeTable {

        let table_schema = Schema::from(&ds.schema().clone());

-        // Peek at the first batch to estimate a good partition count for
-        // write parallelism.
-        let mut peeked = PeekedScannable::new(add.data);
-        let num_partitions = if let Some(first_batch) = peeked.peek().await {
-            let max_partitions = lance_core::utils::tokio::get_num_compute_intensive_cpus();
-            estimate_write_partitions(
-                first_batch.get_array_memory_size(),
-                first_batch.num_rows(),
-                peeked.num_rows(),
-                max_partitions,
-            )
-        } else {
-            1
-        };
-        add.data = Box::new(peeked);
-
        let output = add.into_plan(&table_schema, &table_def)?;

        let lance_params = output
@@ -2151,41 +2135,18 @@ impl BaseTable for NativeTable {
                ..Default::default()
            });

-        // Repartition for write parallelism if beneficial.
-        let plan = if num_partitions > 1 {
-            Arc::new(
-                datafusion_physical_plan::repartition::RepartitionExec::try_new(
-                    output.plan,
-                    datafusion_physical_plan::Partitioning::RoundRobinBatch(num_partitions),
-                )?,
-            ) as Arc<dyn ExecutionPlan>
-        } else {
-            output.plan
-        };
+        let plan = Arc::new(InsertExec::new(
+            ds_wrapper.clone(),
+            ds,
+            output.plan,
+            lance_params,
+        ));

-        let insert_exec = Arc::new(InsertExec::new(ds_wrapper.clone(), ds, plan, lance_params));
-
-        // Execute all partitions in parallel.
-        let task_ctx = Arc::new(TaskContext::default());
-        let handles = FuturesUnordered::new();
-        for partition in 0..num_partitions {
-            let exec = insert_exec.clone();
-            let ctx = task_ctx.clone();
-            handles.push(tokio::spawn(async move {
-                let mut stream = exec
-                    .execute(partition, ctx)
-                    .map_err(|e| -> Error { e.into() })?;
-                while let Some(batch) = stream.next().await {
-                    batch.map_err(|e| -> Error { e.into() })?;
-                }
-                Ok::<_, Error>(())
-            }));
-        }
-        for handle in handles {
-            handle.await.map_err(|e| Error::Runtime {
-                message: format!("Insert task panicked: {}", e),
-            })??;
-        }
+        let stream = execute_plan(plan, Default::default())?;
+        stream
+            .try_collect::<Vec<_>>()
+            .await
+            .map_err(crate::Error::from)?;

        let version = ds_wrapper.get().await?.manifest().version;
        Ok(AddResult { version })
@@ -2555,21 +2516,22 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
-    use std::sync::Arc;
    use std::sync::atomic::{AtomicBool, Ordering};
+    use std::sync::Arc;
    use std::time::Duration;

    use arrow_array::{
+        builder::{ListBuilder, StringBuilder},
        Array, BooleanArray, FixedSizeListArray, Int32Array, LargeStringArray, RecordBatch,
        RecordBatchIterator, RecordBatchReader, StringArray,
-        builder::{ListBuilder, StringBuilder},
    };
    use arrow_array::{BinaryArray, LargeBinaryArray};
    use arrow_data::ArrayDataBuilder;
    use arrow_schema::{DataType, Field, Schema};
    use futures::TryStreamExt;
-    use lance::Dataset;
    use lance::io::{ObjectStoreParams, WrappingObjectStore};
+    use lance::Dataset;
+    use rand::Rng;
    use tempfile::tempdir;

    use super::*;
@@ -2776,8 +2738,9 @@ mod tests {
            false,
        )]));

+        let mut rng = rand::thread_rng();
        let float_arr = Float32Array::from(
-            repeat_with(rand::random::<f32>)
+            repeat_with(|| rng.gen::<f32>())
                .take(512 * dimension as usize)
                .collect::<Vec<f32>>(),
        );
@@ -2882,8 +2845,8 @@ mod tests {
            .await
            .unwrap();

-        use lance::index::DatasetIndexInternalExt;
        use lance::index::vector::ivf::v2::IvfPq as LanceIvfPq;
+        use lance::index::DatasetIndexInternalExt;
        use lance_index::metrics::NoOpMetricsCollector;
        use lance_index::vector::VectorIndex as LanceVectorIndex;

@@ -2931,8 +2894,9 @@ mod tests {
            false,
        )]));

+        let mut rng = rand::thread_rng();
        let float_arr = Float32Array::from(
-            repeat_with(rand::random::<f32>)
+            repeat_with(|| rng.gen::<f32>())
                .take(512 * dimension as usize)
                .collect::<Vec<f32>>(),
        );
@@ -2990,8 +2954,9 @@ mod tests {
            false,
        )]));

+        let mut rng = rand::thread_rng();
        let float_arr = Float32Array::from(
-            repeat_with(rand::random::<f32>)
+            repeat_with(|| rng.gen::<f32>())
                .take(512 * dimension as usize)
                .collect::<Vec<f32>>(),
        );
@@ -3252,20 +3217,16 @@ mod tests {
            .unwrap();

        // Can not create btree or bitmap index on list column
-        assert!(
-            table
-                .create_index(&["tags"], Index::BTree(Default::default()))
-                .execute()
-                .await
-                .is_err()
-        );
-        assert!(
-            table
-                .create_index(&["tags"], Index::Bitmap(Default::default()))
-                .execute()
-                .await
-                .is_err()
-        );
+        assert!(table
+            .create_index(&["tags"], Index::BTree(Default::default()))
+            .execute()
+            .await
+            .is_err());
+        assert!(table
+            .create_index(&["tags"], Index::Bitmap(Default::default()))
+            .execute()
+            .await
+            .is_err());

        // Create bitmap index on the "category" column
        table
--- a/rust/lancedb/src/table/add_data.rs
+++ b/rust/lancedb/src/table/add_data.rs
@@ -7,8 +7,8 @@ use arrow_schema::{DataType, Fields, Schema};
 use lance::dataset::WriteMode;
 use serde::{Deserialize, Serialize};

-use crate::data::scannable::Scannable;
 use crate::data::scannable::scannable_with_embeddings;
+use crate::data::scannable::Scannable;
 use crate::embeddings::EmbeddingRegistry;
 use crate::table::datafusion::cast::cast_to_table_schema;
 use crate::table::datafusion::reject_nan::reject_nan_vectors;
@@ -155,9 +155,7 @@ impl AddDataBuilder {

 pub struct PreprocessingOutput {
    pub plan: Arc<dyn datafusion_physical_plan::ExecutionPlan>,
-    #[cfg_attr(not(feature = "remote"), allow(dead_code))]
    pub overwrite: bool,
-    #[cfg_attr(not(feature = "remote"), allow(dead_code))]
    pub rescannable: bool,
    pub write_options: WriteOptions,
    pub mode: AddDataMode,
@@ -204,14 +202,13 @@ mod tests {

    use arrow::datatypes::Float64Type;
    use arrow_array::{
-        FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray, RecordBatch,
-        RecordBatchIterator, record_batch,
+        record_batch, FixedSizeListArray, Float32Array, Int32Array, LargeStringArray, ListArray,
+        RecordBatch, RecordBatchIterator,
    };
    use arrow_schema::{ArrowError, DataType, Field, Schema};
    use futures::TryStreamExt;
    use lance::dataset::{WriteMode, WriteParams};

-    use crate::Error;
    use crate::arrow::{SendableRecordBatchStream, SimpleRecordBatchStream};
    use crate::connect;
    use crate::data::scannable::Scannable;
@@ -221,8 +218,8 @@ mod tests {
    use crate::query::{ExecutableQuery, QueryBase, Select};
    use crate::table::add_data::NaNVectorBehavior;
    use crate::table::{ColumnDefinition, ColumnKind, Table, TableDefinition, WriteOptions};
-    use crate::test_utils::TestCustomError;
    use crate::test_utils::embeddings::MockEmbed;
+    use crate::Error;

    use super::AddDataMode;

@@ -286,20 +283,17 @@ mod tests {
        test_add_with_data(stream).await;
    }

-    fn assert_preserves_external_error(err: &Error) {
-        assert!(
-            matches!(err, Error::External { source } if source.downcast_ref::<TestCustomError>().is_some()),
-            "Expected Error::External, got: {err:?}"
-        );
-        // The original TestCustomError message should be preserved through the
-        // error chain, even if the error gets wrapped multiple times by
-        // lance's insert pipeline.
-        assert!(
-            err.to_string().contains("TestCustomError occurred"),
-            "Expected original error message to be preserved, got: {err}"
-        );
+    #[derive(Debug)]
+    struct MyError;
+
+    impl std::fmt::Display for MyError {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "MyError occurred")
+        }
    }

+    impl std::error::Error for MyError {}
+
    #[tokio::test]
    async fn test_add_preserves_reader_error() {
        let table = create_test_table().await;
@@ -307,7 +301,7 @@ mod tests {
        let schema = first_batch.schema();
        let iterator = vec![
            Ok(first_batch),
-            Err(ArrowError::ExternalError(Box::new(TestCustomError))),
+            Err(ArrowError::ExternalError(Box::new(MyError))),
        ];
        let reader: Box<dyn arrow_array::RecordBatchReader + Send> = Box::new(
            RecordBatchIterator::new(iterator.into_iter(), schema.clone()),
@@ -315,7 +309,7 @@ mod tests {

        let result = table.add(reader).execute().await;

-        assert_preserves_external_error(&result.unwrap_err());
+        assert!(result.is_err());
    }

    #[tokio::test]
@@ -326,7 +320,7 @@ mod tests {
        let iterator = vec![
            Ok(first_batch),
            Err(Error::External {
-                source: Box::new(TestCustomError),
+                source: Box::new(MyError),
            }),
        ];
        let stream = futures::stream::iter(iterator);
@@ -337,7 +331,7 @@ mod tests {

        let result = table.add(stream).execute().await;

-        assert_preserves_external_error(&result.unwrap_err());
+        assert!(result.is_err());
    }

    #[tokio::test]
--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -17,17 +17,17 @@ use async_trait::async_trait;
 use datafusion_catalog::{Session, TableProvider};
 use datafusion_common::{DataFusionError, Result as DataFusionResult, Statistics};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType, dml::InsertOp};
+use datafusion_expr::{dml::InsertOp, Expr, TableProviderFilterPushDown, TableType};
 use datafusion_physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, stream::RecordBatchStreamAdapter,
+    stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties,
 };
 use futures::{TryFutureExt, TryStreamExt};
 use lance::dataset::{WriteMode, WriteParams};

 use super::{AnyQuery, BaseTable};
 use crate::{
-    Result,
    query::{QueryExecutionOptions, QueryFilter, QueryRequest, Select},
+    Result,
 };
 use arrow_schema::{DataType, Field};
 use lance_index::scalar::FullTextSearchQuery;
@@ -268,7 +268,7 @@ impl TableProvider for BaseTableAdapter {
            InsertOp::Replace => {
                return Err(DataFusionError::NotImplemented(
                    "Replace mode is not supported for LanceDB tables".to_string(),
-                ));
+                ))
            }
        };

@@ -300,13 +300,13 @@ pub mod tests {
    use datafusion_catalog::TableProvider;
    use datafusion_common::stats::Precision;
    use datafusion_execution::SendableRecordBatchStream;
-    use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, col, lit};
+    use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
    use futures::{StreamExt, TryStreamExt};
    use tempfile::tempdir;

    use crate::{
        connect,
-        index::{Index, scalar::BTreeIndexBuilder},
+        index::{scalar::BTreeIndexBuilder, Index},
        table::datafusion::BaseTableAdapter,
    };

--- a/rust/lancedb/src/table/datafusion/cast.rs
+++ b/rust/lancedb/src/table/datafusion/cast.rs
@@ -5,10 +5,10 @@ use std::sync::Arc;

 use arrow_schema::{DataType, Field, FieldRef, Fields, Schema};
 use datafusion::functions::core::{get_field, named_struct};
-use datafusion_common::ScalarValue;
 use datafusion_common::config::ConfigOptions;
+use datafusion_common::ScalarValue;
+use datafusion_physical_expr::expressions::{cast, Literal};
 use datafusion_physical_expr::ScalarFunctionExpr;
-use datafusion_physical_expr::expressions::{Literal, cast};
 use datafusion_physical_plan::expressions::Column;
 use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::{ExecutionPlan, PhysicalExpr};
--- a/rust/lancedb/src/table/datafusion/insert.rs
+++ b/rust/lancedb/src/table/datafusion/insert.rs
@@ -16,9 +16,9 @@ use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion_physical_plan::{
    DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
 };
-use lance::Dataset;
 use lance::dataset::transaction::{Operation, Transaction};
 use lance::dataset::{CommitBuilder, InsertBuilder, WriteParams};
+use lance::Dataset;
 use lance_table::format::Fragment;

 use crate::table::dataset::DatasetConsistencyWrapper;
@@ -195,13 +195,13 @@ impl ExecutionPlan for InsertExec {
                }
            };

-            if let Some(transactions) = to_commit
-                && let Some(merged_txn) = merge_transactions(transactions)
-            {
-                let new_dataset = CommitBuilder::new(dataset.clone())
-                    .execute(merged_txn)
-                    .await?;
-                ds_wrapper.update(new_dataset);
+            if let Some(transactions) = to_commit {
+                if let Some(merged_txn) = merge_transactions(transactions) {
+                    let new_dataset = CommitBuilder::new(dataset.clone())
+                        .execute(merged_txn)
+                        .await?;
+                    ds_wrapper.update(new_dataset);
+                }
            }

            Ok(RecordBatch::try_new(
@@ -222,7 +222,7 @@ mod tests {
    use std::vec;

    use super::*;
-    use arrow_array::{RecordBatchIterator, record_batch};
+    use arrow_array::{record_batch, RecordBatchIterator};
    use datafusion::prelude::SessionContext;
    use datafusion_catalog::MemTable;
    use tempfile::tempdir;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jack Ye	b2653e5524	fix pylance test version	2026-02-24 08:15:25 -08:00
lancedb automation	7324bcec84	chore: update lance dependency to v3.1.0-beta.1	2026-02-24 08:03:11 +00:00