rename gha task

fixlint
remove pydantic 1 support
2026-03-26 02:20:40 +00:00 · 2026-01-29 16:26:28 -08:00 · 2026-01-29 16:21:23 -08:00 · 2026-01-29 16:18:56 -08:00 · 2026-01-29 16:06:36 -08:00 · 2026-01-30 01:47:50 +08:00
57 changed files with 1368 additions and 916 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.23.1"
+current_version = "0.24.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/build_linux_wheel/action.yml
+++ b/.github/workflows/build_linux_wheel/action.yml
@@ -3,7 +3,7 @@ name: build-linux-wheel
 description: "Build a manylinux wheel for lance"
 inputs:
  python-minor-version:
-    description: "8, 9, 10, 11, 12"
+    description: "10, 11, 12, 13"
    required: true
  args:
    description: "--release"
--- a/.github/workflows/build_mac_wheel/action.yml
+++ b/.github/workflows/build_mac_wheel/action.yml
@@ -3,7 +3,7 @@ name: build_wheel
 description: "Build a lance wheel"
 inputs:
  python-minor-version:
-    description: "8, 9, 10, 11"
+    description: "10, 11, 12, 13"
    required: true
  args:
    description: "--release"
--- a/.github/workflows/build_windows_wheel/action.yml
+++ b/.github/workflows/build_windows_wheel/action.yml
@@ -3,7 +3,7 @@ name: build_wheel
 description: "Build a lance wheel"
 inputs:
  python-minor-version:
-    description: "8, 9, 10, 11"
+    description: "10, 11, 12, 13, 14"
    required: true
  args:
    description: "--release"
--- a/.github/workflows/codex-update-lance-dependency.yml
+++ b/.github/workflows/codex-update-lance-dependency.yml
@@ -75,6 +75,13 @@ jobs:
          VERSION="${VERSION#v}"
          BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"

+          # Use "chore" for beta/rc versions, "feat" for stable releases
+          if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
+            COMMIT_TYPE="chore"
+          else
+            COMMIT_TYPE="feat"
+          fi
+
          cat <<EOF >/tmp/codex-prompt.txt
          You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.

@@ -84,10 +91,10 @@ jobs:
          3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
          4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
          5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
-          6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
+          6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
          7. Push the branch to origin. If the branch already exists, force-push your changes.
          8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
-          9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
+          9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
          10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.

          Constraints:
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -41,7 +41,7 @@ jobs:
          sudo apt install -y protobuf-compiler libssl-dev
          rustup update && rustup default
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
          python-version: "3.10"
          cache: "pip"
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -44,12 +44,12 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.8
+          python-version: "3.10"
      - uses: ./.github/workflows/build_linux_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: 10
          args: "--release --strip ${{ matrix.config.extra_args }}"
          arm-build: ${{ matrix.config.platform == 'aarch64' }}
          manylinux: ${{ matrix.config.manylinux }}
@@ -74,12 +74,12 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.12
+          python-version: "3.13"
      - uses: ./.github/workflows/build_mac_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: 10
          args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
      - uses: ./.github/workflows/upload_wheel
        if: startsWith(github.ref, 'refs/tags/python-v')
@@ -95,12 +95,12 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.12
+          python-version: "3.13"
      - uses: ./.github/workflows/build_windows_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: 10
          args: "--release --strip"
          vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
      - uses: ./.github/workflows/upload_wheel
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -25,7 +25,7 @@ jobs:
  lint:
    name: "Lint"
    timeout-minutes: 30
-    runs-on: "ubuntu-22.04"
+    runs-on: "ubuntu-24.04"
    defaults:
      run:
        shell: bash
@@ -36,9 +36,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - name: Install ruff
        run: |
          pip install ruff==0.9.9
@@ -61,9 +61,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - name: Install protobuf compiler
        run: |
          sudo apt update
@@ -90,9 +90,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
          cache: "pip"
      - name: Install protobuf
        run: |
@@ -110,7 +110,7 @@ jobs:
    timeout-minutes: 30
    strategy:
      matrix:
-        python-minor-version: ["9", "12"]
+        python-minor-version: ["10", "13"]
    runs-on: "ubuntu-24.04"
    defaults:
      run:
@@ -126,7 +126,7 @@ jobs:
          sudo apt update
          sudo apt install -y protobuf-compiler
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
          python-version: 3.${{ matrix.python-minor-version }}
      - uses: ./.github/workflows/build_linux_wheel
@@ -156,9 +156,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - uses: ./.github/workflows/build_mac_wheel
        with:
          args: --profile ci
@@ -185,9 +185,9 @@ jobs:
          fetch-depth: 0
          lfs: true
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: "3.12"
+          python-version: "3.13"
      - uses: ./.github/workflows/build_windows_wheel
        with:
          args: --profile ci
@@ -195,7 +195,7 @@ jobs:
      # Make sure wheels are not included in the Rust cache
      - name: Delete wheels
        run: rm -rf target/wheels
-  pydantic1x:
+  min-deps:
    timeout-minutes: 30
    runs-on: "ubuntu-24.04"
    defaults:
@@ -212,12 +212,11 @@ jobs:
          sudo apt update
          sudo apt install -y protobuf-compiler
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
        with:
-          python-version: 3.9
+          python-version: "3.10"
      - name: Install lancedb
        run: |
-          pip install "pydantic<2"
          pip install pyarrow==16
          pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
          pip install tantivy
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -48,6 +48,8 @@ jobs:
        run: cargo fmt --all -- --check
      - name: Run clippy
        run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
+      - name: Run clippy (without remote feature)
+        run: cargo clippy --profile ci --workspace --tests -- -D warnings

  build-no-lock:
    runs-on: ubuntu-24.04
@@ -181,7 +183,7 @@ jobs:
    runs-on: ubuntu-24.04
    strategy:
      matrix:
-        msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
+        msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
    env:
      # Need up-to-date compilers for kernels
      CC: clang-18
@@ -212,4 +214,6 @@ jobs:
          cargo update -p aws-sdk-sts --precise 1.51.0
          cargo update -p home --precise 0.5.9
      - name: cargo +${{ matrix.msrv }} check
+        env:
+          RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
        run: cargo check --profile ci --workspace --tests --benches --all-features
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,42 +12,42 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.78.0"
+rust-version = "1.88.0"

 [workspace.dependencies]
-lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
-arrow = { version = "57.2", optional = false }
-arrow-array = "57.2"
-arrow-data = "57.2"
-arrow-ipc = "57.2"
-arrow-ord = "57.2"
-arrow-schema = "57.2"
-arrow-select = "57.2"
-arrow-cast = "57.2"
+arrow = { version = "56.2", optional = false }
+arrow-array = "56.2"
+arrow-data = "56.2"
+arrow-ipc = "56.2"
+arrow-ord = "56.2"
+arrow-schema = "56.2"
+arrow-select = "56.2"
+arrow-cast = "56.2"
 async-trait = "0"
-datafusion = { version = "51.0", default-features = false }
-datafusion-catalog = "51.0"
-datafusion-common = { version = "51.0", default-features = false }
-datafusion-execution = "51.0"
-datafusion-expr = "51.0"
-datafusion-physical-plan = "51.0"
+datafusion = { version = "50.1", default-features = false }
+datafusion-catalog = "50.1"
+datafusion-common = { version = "50.1", default-features = false }
+datafusion-execution = "50.1"
+datafusion-expr = "50.1"
+datafusion-physical-plan = "50.1"
 env_logger = "0.11"
-half = { "version" = "2.7.1", default-features = false, features = [
+half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
@@ -59,7 +59,7 @@ rand = "0.9"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
-regex = "1.12"
+regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
 chrono = "0.4"
--- a/docs/src/java/java.md
+++ b/docs/src/java/java.md
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
 <dependency>
    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-core</artifactId>
-    <version>0.23.1</version>
+    <version>0.24.1</version>
 </dependency>
 ```

--- a/java/lancedb-core/pom.xml
+++ b/java/lancedb-core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
      <groupId>com.lancedb</groupId>
      <artifactId>lancedb-parent</artifactId>
-      <version>0.23.1-final.0</version>
+      <version>0.24.1-final.0</version>
      <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.23.1-final.0</version>
+    <version>0.24.1-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.23.1"
+version = "0.24.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.23.1",
+  "version": "0.24.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.23.1",
+	"version": "0.24.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.23.1",
+  "version": "0.24.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.23.1",
+      "version": "0.24.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.23.1",
+  "version": "0.24.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.27.0-beta.0"
+current_version = "0.27.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/CONTRIBUTING.md
+++ b/python/CONTRIBUTING.md
@@ -16,7 +16,7 @@ The Python package is a wrapper around the Rust library, `lancedb`. We use

 To set up your development environment, you will need to install the following:

-1. Python 3.9 or later
+1. Python 3.10 or later
 2. Cargo (Rust's package manager). Use [rustup](https://rustup.rs/) to install.
 3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)

--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,28 +1,28 @@
 [package]
 name = "lancedb-python"
-version = "0.27.0-beta.0"
+version = "0.27.1"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
 repository.workspace = true
 keywords.workspace = true
 categories.workspace = true
-rust-version = "1.75.0"
+rust-version = "1.88.0"

 [lib]
 name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "57.2", features = ["pyarrow"] }
+arrow = { version = "56.2", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 lance-core.workspace = true
 lance-namespace.workspace = true
 lance-io.workspace = true
 env_logger.workspace = true
-pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.26", features = [
+pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] }
+pyo3-async-runtimes = { version = "0.25", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -32,9 +32,9 @@ snafu.workspace = true
 tokio = { version = "1.40", features = ["sync"] }

 [build-dependencies]
-pyo3-build-config = { version = "0.26", features = [
+pyo3-build-config = { version = "0.25", features = [
    "extension-module",
-    "abi3-py39",
+    "abi3-py310",
 ] }

 [features]
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -8,7 +8,7 @@ dependencies = [
    "overrides>=0.7; python_version<'3.12'",
    "packaging",
    "pyarrow>=16",
-    "pydantic>=1.10",
+    "pydantic>=2",
    "tqdm>=4.27.0",
    "lance-namespace>=0.3.2"
 ]
@@ -16,7 +16,7 @@ description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
 license = { file = "LICENSE" }
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 keywords = [
    "data-format",
    "data-science",
@@ -33,10 +33,10 @@ classifiers = [
    "Programming Language :: Python",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
    "Topic :: Scientific/Engineering",
 ]

@@ -137,4 +137,4 @@ include = [
    "python/lancedb/_lancedb.pyi",
 ]
 exclude = ["python/tests/"]
-pythonVersion = "3.12"
+pythonVersion = "3.13"
--- a/python/python/lancedb/pydantic.py
+++ b/python/python/lancedb/pydantic.py
@@ -6,7 +6,6 @@
 from __future__ import annotations

 import inspect
-import sys
 import types
 from abc import ABC, abstractmethod
 from datetime import date, datetime
@@ -141,14 +140,6 @@ def Vector(
                raise TypeError("A list of numbers or numpy.ndarray is needed")
            return cls(v)

-        if PYDANTIC_VERSION.major < 2:
-
-            @classmethod
-            def __modify_schema__(cls, field_schema: Dict[str, Any]):
-                field_schema["items"] = {"type": "number"}
-                field_schema["maxItems"] = dim
-                field_schema["minItems"] = dim
-
    return FixedSizeList


@@ -226,26 +217,14 @@ def MultiVector(
        def __get_validators__(cls) -> Generator[Callable, None, None]:
            yield cls.validate

-        # For pydantic v1
        @classmethod
-        def validate(cls, v):
-            if not isinstance(v, (list, range)):
-                raise TypeError("A list of vectors is needed")
-            for vec in v:
-                if not isinstance(vec, (list, range, np.ndarray)) or len(vec) != dim:
-                    raise TypeError(f"Each vector must be a list of {dim} numbers")
-            return cls(v)
-
-        if PYDANTIC_VERSION.major < 2:
-
-            @classmethod
-            def __modify_schema__(cls, field_schema: Dict[str, Any]):
-                field_schema["items"] = {
-                    "type": "array",
-                    "items": {"type": "number"},
-                    "minItems": dim,
-                    "maxItems": dim,
-                }
+        def __modify_schema__(cls, field_schema: Dict[str, Any]):
+            field_schema["items"] = {
+                "type": "array",
+                "items": {"type": "number"},
+                "minItems": dim,
+                "maxItems": dim,
+            }

    return MultiVectorList

@@ -275,35 +254,31 @@ def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
        return pa.timestamp("us", tz=tz)
    elif getattr(py_type, "__origin__", None) in (list, tuple):
        child = py_type.__args__[0]
-        return pa.list_(_py_type_to_arrow_type(child, field))
+        return _pydantic_list_child_to_arrow(child, field)
    raise TypeError(
        f"Converting Pydantic type to Arrow Type: unsupported type {py_type}."
    )


-if PYDANTIC_VERSION.major < 2:
-
-    def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
-        return [
-            _pydantic_to_field(name, field) for name, field in model.__fields__.items()
-        ]
-
-else:
-
-    def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
-        return [
-            _pydantic_to_field(name, field)
-            for name, field in model.model_fields.items()
-        ]
+def _pydantic_model_to_fields(model: pydantic.BaseModel) -> List[pa.Field]:
+    return [
+        _pydantic_to_field(name, field) for name, field in model.model_fields.items()
+    ]


 def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
+    def _safe_issubclass(candidate: Any, base: type) -> bool:
+        try:
+            return issubclass(candidate, base)
+        except TypeError:
+            return False
+
    if inspect.isclass(tp):
-        if issubclass(tp, pydantic.BaseModel):
+        if _safe_issubclass(tp, pydantic.BaseModel):
            # Struct
            fields = _pydantic_model_to_fields(tp)
            return pa.struct(fields)
-        if issubclass(tp, FixedSizeListMixin):
+        if _safe_issubclass(tp, FixedSizeListMixin):
            if getattr(tp, "is_multi_vector", lambda: False)():
                return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
            # For regular Vector
@@ -311,45 +286,67 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
    return _py_type_to_arrow_type(tp, field)


+def _pydantic_list_child_to_arrow(child: Any, field: FieldInfo) -> pa.DataType:
+    unwrapped = _unwrap_optional_annotation(child)
+    if unwrapped is not None:
+        return pa.list_(
+            pa.field("item", _pydantic_type_to_arrow_type(unwrapped, field), True)
+        )
+    return pa.list_(_pydantic_type_to_arrow_type(child, field))
+
+
+def _unwrap_optional_annotation(annotation: Any) -> Any | None:
+    if isinstance(annotation, (_GenericAlias, GenericAlias)):
+        origin = annotation.__origin__
+        args = annotation.__args__
+        if origin == Union:
+            non_none = [arg for arg in args if arg is not type(None)]
+            if len(non_none) == 1 and len(non_none) != len(args):
+                return non_none[0]
+    elif isinstance(annotation, types.UnionType):
+        args = annotation.__args__
+        non_none = [arg for arg in args if arg is not type(None)]
+        if len(non_none) == 1 and len(non_none) != len(args):
+            return non_none[0]
+    return None
+
+
 def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
    """Convert a Pydantic FieldInfo to Arrow DataType"""
+    unwrapped = _unwrap_optional_annotation(field.annotation)
+    if unwrapped is not None:
+        return _pydantic_type_to_arrow_type(unwrapped, field)
    if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
        origin = field.annotation.__origin__
        args = field.annotation.__args__

        if origin is list:
            child = args[0]
-            return pa.list_(_py_type_to_arrow_type(child, field))
-        elif origin == Union:
-            if len(args) == 2 and args[1] is type(None):
-                return _pydantic_type_to_arrow_type(args[0], field)
-    elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
-        args = field.annotation.__args__
-        if len(args) == 2:
-            for typ in args:
-                if typ is type(None):
-                    continue
-                return _py_type_to_arrow_type(typ, field)
+            return _pydantic_list_child_to_arrow(child, field)
    return _pydantic_type_to_arrow_type(field.annotation, field)


 def is_nullable(field: FieldInfo) -> bool:
    """Check if a Pydantic FieldInfo is nullable."""
+    if _unwrap_optional_annotation(field.annotation) is not None:
+        return True
    if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
        origin = field.annotation.__origin__
        args = field.annotation.__args__
        if origin == Union:
-            if len(args) == 2 and args[1] is type(None):
+            if any(typ is type(None) for typ in args):
                return True
-    elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
+    elif isinstance(field.annotation, types.UnionType):
        args = field.annotation.__args__
        for typ in args:
            if typ is type(None):
                return True
-    elif inspect.isclass(field.annotation) and issubclass(
-        field.annotation, FixedSizeListMixin
-    ):
-        return field.annotation.nullable()
+    elif inspect.isclass(field.annotation):
+        try:
+            if issubclass(field.annotation, FixedSizeListMixin):
+                return field.annotation.nullable()
+        except TypeError:
+            return False
    return False


@@ -446,8 +443,6 @@ class LanceModel(pydantic.BaseModel):

    @classmethod
    def safe_get_fields(cls):
-        if PYDANTIC_VERSION.major < 2:
-            return cls.__fields__
        return cls.model_fields

    @classmethod
@@ -490,18 +485,8 @@ def get_extras(field_info: FieldInfo, key: str) -> Any:
    return (field_info.field_info.extra or {}).get("json_schema_extra", {}).get(key)


-if PYDANTIC_VERSION.major < 2:
-
-    def model_to_dict(model: pydantic.BaseModel) -> Dict[str, Any]:
-        """
-        Convert a Pydantic model to a dictionary.
-        """
-        return model.dict()
-
-else:
-
-    def model_to_dict(model: pydantic.BaseModel) -> Dict[str, Any]:
-        """
-        Convert a Pydantic model to a dictionary.
-        """
-        return model.model_dump()
+def model_to_dict(model: pydantic.BaseModel) -> Dict[str, Any]:
+    """
+    Convert a Pydantic model to a dictionary.
+    """
+    return model.model_dump()
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -961,27 +961,22 @@ class LanceQueryBuilder(ABC):
        >>> query = [100, 100]
        >>> plan = table.search(query).analyze_plan()
        >>> print(plan)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-        AnalyzeExec verbose=true, elapsed=..., metrics=...
-          TracedExec, elapsed=..., metrics=...
-            ProjectionExec: elapsed=..., expr=[...],
-            metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
-              GlobalLimitExec: elapsed=..., skip=0, fetch=10,
-              metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
-                FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
-                  SortExec: elapsed=..., TopK(fetch=10), expr=[...],
+        AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
+          TracedExec, metrics=[], cumulative_cpu=...
+            ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
+              GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
+                FilterExec: _distance@2 IS NOT NULL,
+                metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
+                  SortExec: TopK(fetch=10), expr=[...],
                  preserve_partitioning=[...],
-                  metrics=[output_rows=..., elapsed_compute=...,
-                  output_bytes=..., row_replacements=...]
-                    KNNVectorDistance: elapsed=..., metric=l2,
-                    metrics=[output_rows=..., elapsed_compute=...,
-                    output_bytes=..., output_batches=...]
-                      LanceRead: elapsed=..., uri=..., projection=[vector],
-                      num_fragments=..., range_before=None, range_after=None,
-                      row_id=true, row_addr=false,
-                      full_filter=--, refine_filter=--,
-                      metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
-                      fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
-                      bytes_read=..., iops=..., requests=..., task_wait_time=...]
+                  metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
+                  cumulative_cpu=...
+                    KNNVectorDistance: metric=l2,
+                    metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
+                    cumulative_cpu=...
+                      LanceRead: uri=..., projection=[vector], ...
+                      metrics=[output_rows=..., elapsed_compute=...,
+                      bytes_read=..., iops=..., requests=...], cumulative_cpu=...

        Returns
        -------
--- a/python/python/tests/conftest.py
+++ b/python/python/tests/conftest.py
@@ -2,12 +2,27 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 from datetime import timedelta
+
 from lancedb.db import AsyncConnection, DBConnection
 import lancedb
 import pytest
 import pytest_asyncio


+def pandas_string_type():
+    """Return the PyArrow string type that pandas uses for string columns.
+
+    pandas 3.0+ uses large_string for string columns, pandas 2.x uses string.
+    """
+    import pandas as pd
+    import pyarrow as pa
+
+    version = tuple(int(x) for x in pd.__version__.split(".")[:2])
+    if version >= (3, 0):
+        return pa.large_utf8()
+    return pa.utf8()
+
+
 # Use an in-memory database for most tests.
@pytest.fixture
 def mem_db() -> DBConnection:
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -268,6 +268,8 @@ async def test_create_table_from_iterator_async(mem_db_async: lancedb.AsyncConne


 def test_create_exist_ok(tmp_db: lancedb.DBConnection):
+    from conftest import pandas_string_type
+
    data = pd.DataFrame(
        {
            "vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -286,10 +288,11 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
    assert tbl.schema == tbl2.schema
    assert len(tbl) == len(tbl2)

+    # pandas 3.0+ uses large_string, pandas 2.x uses string
    schema = pa.schema(
        [
            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-            pa.field("item", pa.utf8()),
+            pa.field("item", pandas_string_type()),
            pa.field("price", pa.float64()),
        ]
    )
@@ -299,7 +302,7 @@ def test_create_exist_ok(tmp_db: lancedb.DBConnection):
    bad_schema = pa.schema(
        [
            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-            pa.field("item", pa.utf8()),
+            pa.field("item", pandas_string_type()),
            pa.field("price", pa.float64()),
            pa.field("extra", pa.float32()),
        ]
@@ -365,6 +368,8 @@ async def test_create_mode_async(tmp_db_async: lancedb.AsyncConnection):

@pytest.mark.asyncio
 async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
+    from conftest import pandas_string_type
+
    data = pd.DataFrame(
        {
            "vector": [[3.1, 4.1], [5.9, 26.5]],
@@ -382,10 +387,11 @@ async def test_create_exist_ok_async(tmp_db_async: lancedb.AsyncConnection):
    assert tbl.name == tbl2.name
    assert await tbl.schema() == await tbl2.schema()

+    # pandas 3.0+ uses large_string, pandas 2.x uses string
    schema = pa.schema(
        [
            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-            pa.field("item", pa.utf8()),
+            pa.field("item", pandas_string_type()),
            pa.field("price", pa.float64()),
        ]
    )
@@ -595,6 +601,8 @@ def test_open_table_sync(tmp_db: lancedb.DBConnection):

@pytest.mark.asyncio
 async def test_open_table(tmp_path):
+    from conftest import pandas_string_type
+
    db = await lancedb.connect_async(tmp_path)
    data = pd.DataFrame(
        {
@@ -614,10 +622,11 @@ async def test_open_table(tmp_path):
        )
        is not None
    )
+    # pandas 3.0+ uses large_string, pandas 2.x uses string
    assert await tbl.schema() == pa.schema(
        {
            "vector": pa.list_(pa.float32(), list_size=2),
-            "item": pa.utf8(),
+            "item": pandas_string_type(),
            "price": pa.float64(),
        }
    )
--- a/python/python/tests/test_namespace_integration.py
+++ b/python/python/tests/test_namespace_integration.py
@@ -26,6 +26,8 @@ import pytest
 from lance_namespace import (
    CreateEmptyTableRequest,
    CreateEmptyTableResponse,
+    DeclareTableRequest,
+    DeclareTableResponse,
    DescribeTableRequest,
    DescribeTableResponse,
    LanceNamespace,
@@ -160,6 +162,19 @@ class TrackingNamespace(LanceNamespace):

        return modified

+    def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
+        """Track declare_table calls and inject rotating credentials."""
+        with self.lock:
+            self.create_call_count += 1
+            count = self.create_call_count
+
+        response = self.inner.declare_table(request)
+        response.storage_options = self._modify_storage_options(
+            response.storage_options, count
+        )
+
+        return response
+
    def create_empty_table(
        self, request: CreateEmptyTableRequest
    ) -> CreateEmptyTableResponse:
--- a/python/python/tests/test_pydantic.py
+++ b/python/python/tests/test_pydantic.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import json
-import sys
 from datetime import date, datetime
 from typing import List, Optional, Tuple

@@ -20,10 +19,6 @@ from pydantic import BaseModel
 from pydantic import Field


-@pytest.mark.skipif(
-    sys.version_info < (3, 9),
-    reason="using native type alias requires python3.9 or higher",
-)
 def test_pydantic_to_arrow():
    class StructModel(pydantic.BaseModel):
        a: str
@@ -83,10 +78,6 @@ def test_pydantic_to_arrow():
    assert schema == expect_schema


-@pytest.mark.skipif(
-    sys.version_info < (3, 10),
-    reason="using | type syntax requires python3.10 or higher",
-)
 def test_optional_types_py310():
    class TestModel(pydantic.BaseModel):
        a: str | None
@@ -105,10 +96,233 @@ def test_optional_types_py310():
    assert schema == expect_schema


-@pytest.mark.skipif(
-    sys.version_info > (3, 8),
-    reason="using native type alias requires python3.9 or higher",
-)
+def test_optional_structs():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        split: SplitInfo | None = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "split",
+                pa.struct(
+                    [
+                        pa.field("start_frame", pa.int64(), False),
+                        pa.field("end_frame", pa.int64(), False),
+                    ]
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_optional_struct_list_py310():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[SplitInfo] | None = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("start_frame", pa.int64(), False),
+                            pa.field("end_frame", pa.int64(), False),
+                        ]
+                    )
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[SplitInfo]
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("start_frame", pa.int64(), False),
+                            pa.field("end_frame", pa.int64(), False),
+                        ]
+                    )
+                ),
+                False,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: Optional[list[SplitInfo]] = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.struct(
+                        [
+                            pa.field("start_frame", pa.int64(), False),
+                            pa.field("end_frame", pa.int64(), False),
+                        ]
+                    )
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional_items():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[Optional[SplitInfo]]
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.field(
+                        "item",
+                        pa.struct(
+                            [
+                                pa.field("start_frame", pa.int64(), False),
+                                pa.field("end_frame", pa.int64(), False),
+                            ]
+                        ),
+                        True,
+                    )
+                ),
+                False,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional_container_and_items():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: Optional[list[Optional[SplitInfo]]] = None
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.field(
+                        "item",
+                        pa.struct(
+                            [
+                                pa.field("start_frame", pa.int64(), False),
+                                pa.field("end_frame", pa.int64(), False),
+                            ]
+                        ),
+                        True,
+                    )
+                ),
+                True,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
+def test_nested_struct_list_optional_items_pep604():
+    class SplitInfo(pydantic.BaseModel):
+        start_frame: int
+        end_frame: int
+
+    class TestModel(pydantic.BaseModel):
+        id: str
+        splits: list[SplitInfo | None]
+
+    schema = pydantic_to_schema(TestModel)
+
+    expect_schema = pa.schema(
+        [
+            pa.field("id", pa.utf8(), False),
+            pa.field(
+                "splits",
+                pa.list_(
+                    pa.field(
+                        "item",
+                        pa.struct(
+                            [
+                                pa.field("start_frame", pa.int64(), False),
+                                pa.field("end_frame", pa.int64(), False),
+                            ]
+                        ),
+                        True,
+                    )
+                ),
+                False,
+            ),
+        ]
+    )
+    assert schema == expect_schema
+
+
 def test_pydantic_to_arrow_py38():
    class StructModel(pydantic.BaseModel):
        a: str
--- a/python/python/tests/test_util.py
+++ b/python/python/tests/test_util.py
@@ -528,12 +528,19 @@ def test_sanitize_data(
        else:
            expected_schema = schema
    else:
+        from conftest import pandas_string_type
+
+        # polars uses large_string, pandas 3.0+ uses large_string, others use string
+        if isinstance(data, pl.DataFrame):
+            text_type = pa.large_utf8()
+        elif isinstance(data, pd.DataFrame):
+            text_type = pandas_string_type()
+        else:
+            text_type = pa.string()
        expected_schema = pa.schema(
            {
                "id": pa.int64(),
-                "text": pa.large_utf8()
-                if isinstance(data, pl.DataFrame)
-                else pa.string(),
+                "text": text_type,
                "vector": pa.list_(pa.float32(), 10),
            }
        )
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -10,7 +10,8 @@ use arrow::{
 use futures::stream::StreamExt;
 use lancedb::arrow::SendableRecordBatchStream;
 use pyo3::{
-    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
+    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
+    Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -35,11 +36,8 @@ impl RecordBatchStream {
 #[pymethods]
 impl RecordBatchStream {
    #[getter]
-    pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
-        (*self.schema)
-            .clone()
-            .into_pyarrow(py)
-            .map(|obj| obj.unbind())
+    pub fn schema(&self, py: Python) -> PyResult<PyObject> {
+        (*self.schema).clone().into_pyarrow(py)
    }

    pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -55,12 +53,7 @@ impl RecordBatchStream {
                .next()
                .await
                .ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = inner_next.infer_error()?.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
        })
    }
 }
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -12,7 +12,7 @@ use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pyfunction, pymethods,
    types::{PyDict, PyDictMethods},
-    Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
+    Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -114,7 +114,7 @@ impl Connection {
        data: Bound<'_, PyAny>,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<Py<PyAny>>,
+        storage_options_provider: Option<PyObject>,
        location: Option<String>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
@@ -152,7 +152,7 @@ impl Connection {
        schema: Bound<'_, PyAny>,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<Py<PyAny>>,
+        storage_options_provider: Option<PyObject>,
        location: Option<String>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let inner = self_.get_inner()?.clone();
@@ -187,7 +187,7 @@ impl Connection {
        name: String,
        namespace: Vec<String>,
        storage_options: Option<HashMap<String, String>>,
-        storage_options_provider: Option<Py<PyAny>>,
+        storage_options_provider: Option<PyObject>,
        index_cache_size: Option<u32>,
        location: Option<String>,
    ) -> PyResult<Bound<'_, PyAny>> {
@@ -307,7 +307,6 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.list_namespaces(request).await.infer_error()?;
-            #[allow(deprecated)]
            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("namespaces", response.namespaces)?;
@@ -328,7 +327,8 @@ impl Connection {
        let py = self_.py();
        future_into_py(py, async move {
            use lance_namespace::models::CreateNamespaceRequest;
-            let mode_enum = mode.and_then(|m| match m.to_lowercase().as_str() {
+            // Mode is now a string field
+            let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
                "create" => Some("Create".to_string()),
                "exist_ok" => Some("ExistOk".to_string()),
                "overwrite" => Some("Overwrite".to_string()),
@@ -340,12 +340,11 @@ impl Connection {
                } else {
                    Some(namespace)
                },
-                mode: mode_enum,
+                mode: mode_str,
                properties,
                ..Default::default()
            };
            let response = inner.create_namespace(request).await.infer_error()?;
-            #[allow(deprecated)]
            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
@@ -365,12 +364,13 @@ impl Connection {
        let py = self_.py();
        future_into_py(py, async move {
            use lance_namespace::models::DropNamespaceRequest;
-            let mode_enum = mode.and_then(|m| match m.to_uppercase().as_str() {
+            // Mode and Behavior are now string fields
+            let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
                "SKIP" => Some("Skip".to_string()),
                "FAIL" => Some("Fail".to_string()),
                _ => None,
            });
-            let behavior_enum = behavior.and_then(|b| match b.to_uppercase().as_str() {
+            let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
                "RESTRICT" => Some("Restrict".to_string()),
                "CASCADE" => Some("Cascade".to_string()),
                _ => None,
@@ -381,12 +381,11 @@ impl Connection {
                } else {
                    Some(namespace)
                },
-                mode: mode_enum,
-                behavior: behavior_enum,
+                mode: mode_str,
+                behavior: behavior_str,
                ..Default::default()
            };
            let response = inner.drop_namespace(request).await.infer_error()?;
-            #[allow(deprecated)]
            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
@@ -414,7 +413,6 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.describe_namespace(request).await.infer_error()?;
-            #[allow(deprecated)]
            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("properties", response.properties)?;
@@ -445,7 +443,6 @@ impl Connection {
                ..Default::default()
            };
            let response = inner.list_tables(request).await.infer_error()?;
-            #[allow(deprecated)]
            Python::with_gil(|py| -> PyResult<Py<PyDict>> {
                let dict = PyDict::new(py);
                dict.set_item("tables", response.tables)?;
--- a/python/src/error.rs
+++ b/python/src/error.rs
@@ -40,34 +40,31 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
                    request_id,
                    source,
                    status_code,
-                } => {
-                    #[allow(deprecated)]
-                    Python::with_gil(|py| {
-                        let message = err.to_string();
-                        let http_err_cls = py
-                            .import(intern!(py, "lancedb.remote.errors"))?
-                            .getattr(intern!(py, "HttpError"))?;
-                        let err = http_err_cls.call1((
-                            message,
+                } => Python::with_gil(|py| {
+                    let message = err.to_string();
+                    let http_err_cls = py
+                        .import(intern!(py, "lancedb.remote.errors"))?
+                        .getattr(intern!(py, "HttpError"))?;
+                    let err = http_err_cls.call1((
+                        message,
+                        request_id,
+                        status_code.map(|s| s.as_u16()),
+                    ))?;
+
+                    if let Some(cause) = source.source() {
+                        // The HTTP error already includes the first cause. But
+                        // we can add the rest of the chain if there is any more.
+                        let cause_err = http_from_rust_error(
+                            py,
+                            cause,
                            request_id,
                            status_code.map(|s| s.as_u16()),
-                        ))?;
+                        )?;
+                        err.setattr(intern!(py, "__cause__"), cause_err)?;
+                    }

-                        if let Some(cause) = source.source() {
-                            // The HTTP error already includes the first cause. But
-                            // we can add the rest of the chain if there is any more.
-                            let cause_err = http_from_rust_error(
-                                py,
-                                cause,
-                                request_id,
-                                status_code.map(|s| s.as_u16()),
-                            )?;
-                            err.setattr(intern!(py, "__cause__"), cause_err)?;
-                        }
-
-                        Err(PyErr::from_value(err))
-                    })
-                }
+                    Err(PyErr::from_value(err))
+                }),
                LanceError::Retry {
                    request_id,
                    request_failures,
@@ -78,37 +75,33 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
                    max_read_failures,
                    source,
                    status_code,
-                } =>
-                {
-                    #[allow(deprecated)]
-                    Python::with_gil(|py| {
-                        let cause_err = http_from_rust_error(
-                            py,
-                            source.as_ref(),
-                            request_id,
-                            status_code.map(|s| s.as_u16()),
-                        )?;
+                } => Python::with_gil(|py| {
+                    let cause_err = http_from_rust_error(
+                        py,
+                        source.as_ref(),
+                        request_id,
+                        status_code.map(|s| s.as_u16()),
+                    )?;

-                        let message = err.to_string();
-                        let retry_error_cls = py
-                            .import(intern!(py, "lancedb.remote.errors"))?
-                            .getattr("RetryError")?;
-                        let err = retry_error_cls.call1((
-                            message,
-                            request_id,
-                            *request_failures,
-                            *connect_failures,
-                            *read_failures,
-                            *max_request_failures,
-                            *max_connect_failures,
-                            *max_read_failures,
-                            status_code.map(|s| s.as_u16()),
-                        ))?;
+                    let message = err.to_string();
+                    let retry_error_cls = py
+                        .import(intern!(py, "lancedb.remote.errors"))?
+                        .getattr("RetryError")?;
+                    let err = retry_error_cls.call1((
+                        message,
+                        request_id,
+                        *request_failures,
+                        *connect_failures,
+                        *read_failures,
+                        *max_request_failures,
+                        *max_connect_failures,
+                        *max_read_failures,
+                        status_code.map(|s| s.as_u16()),
+                    ))?;

-                        err.setattr(intern!(py, "__cause__"), cause_err)?;
-                        Err(PyErr::from_value(err))
-                    })
-                }
+                    err.setattr(intern!(py, "__cause__"), cause_err)?;
+                    Err(PyErr::from_value(err))
+                }),
                _ => self.runtime_error(),
            },
        }
--- a/python/src/header.rs
+++ b/python/src/header.rs
@@ -12,7 +12,6 @@ pub struct PyHeaderProvider {

 impl Clone for PyHeaderProvider {
    fn clone(&self) -> Self {
-        #[allow(deprecated)]
        Python::with_gil(|py| Self {
            provider: self.provider.clone_ref(py),
        })
@@ -26,7 +25,6 @@ impl PyHeaderProvider {

    /// Get headers from the Python provider (internal implementation)
    fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
-        #[allow(deprecated)]
        Python::with_gil(|py| {
            // Call the get_headers method
            let result = self.provider.call_method0(py, "get_headers");
--- a/python/src/permutation.rs
+++ b/python/src/permutation.rs
@@ -19,7 +19,7 @@ use pyo3::{
    exceptions::PyRuntimeError,
    pyclass, pymethods,
    types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
-    Bound, Py, PyAny, PyRef, PyRefMut, PyResult, Python,
+    Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -281,12 +281,7 @@ impl PyPermutationReader {
        let reader = slf.reader.clone();
        future_into_py(slf.py(), async move {
            let schema = reader.output_schema(selection).await.infer_error()?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = schema.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| schema.to_pyarrow(py))
        })
    }

--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -29,7 +29,6 @@ use pyo3::types::PyList;
 use pyo3::types::{PyDict, PyString};
 use pyo3::Bound;
 use pyo3::IntoPyObject;
-use pyo3::Py;
 use pyo3::PyAny;
 use pyo3::PyRef;
 use pyo3::PyResult;
@@ -454,12 +453,7 @@ impl Query {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = schema.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| schema.to_pyarrow(py))
        })
    }

@@ -538,12 +532,7 @@ impl TakeQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = schema.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| schema.to_pyarrow(py))
        })
    }

@@ -638,12 +627,7 @@ impl FTSQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = schema.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| schema.to_pyarrow(py))
        })
    }

@@ -822,12 +806,7 @@ impl VectorQuery {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.output_schema().await.infer_error()?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = schema.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| schema.to_pyarrow(py))
        })
    }

--- a/python/src/storage_options.rs
+++ b/python/src/storage_options.rs
@@ -17,12 +17,11 @@ use pyo3::types::PyDict;
 /// Internal wrapper around a Python object implementing StorageOptionsProvider
 pub struct PyStorageOptionsProvider {
    /// The Python object implementing fetch_storage_options()
-    inner: Py<PyAny>,
+    inner: PyObject,
 }

 impl Clone for PyStorageOptionsProvider {
    fn clone(&self) -> Self {
-        #[allow(deprecated)]
        Python::with_gil(|py| Self {
            inner: self.inner.clone_ref(py),
        })
@@ -30,8 +29,7 @@ impl Clone for PyStorageOptionsProvider {
 }

 impl PyStorageOptionsProvider {
-    pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
-        #[allow(deprecated)]
+    pub fn new(obj: PyObject) -> PyResult<Self> {
        Python::with_gil(|py| {
            // Verify the object has a fetch_storage_options method
            if !obj.bind(py).hasattr("fetch_storage_options")? {
@@ -39,9 +37,7 @@ impl PyStorageOptionsProvider {
                    "StorageOptionsProvider must implement fetch_storage_options() method",
                ));
            }
-            Ok(Self {
-                inner: obj.clone_ref(py),
-            })
+            Ok(Self { inner: obj })
        })
    }
 }
@@ -64,7 +60,6 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
        let py_provider = self.py_provider.clone();

        tokio::task::spawn_blocking(move || {
-            #[allow(deprecated)]
            Python::with_gil(|py| {
                // Call the Python fetch_storage_options method
                let result = py_provider
@@ -124,7 +119,6 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
    }

    fn provider_id(&self) -> String {
-        #[allow(deprecated)]
        Python::with_gil(|py| {
            // Call provider_id() method on the Python object
            let obj = self.py_provider.inner.bind(py);
@@ -149,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
 /// This is the main entry point for converting Python StorageOptionsProvider objects
 /// to Rust trait objects that can be used by the Lance ecosystem.
 pub fn py_object_to_storage_options_provider(
-    py_obj: Py<PyAny>,
+    py_obj: PyObject,
 ) -> PyResult<Arc<dyn StorageOptionsProvider>> {
    let py_provider = PyStorageOptionsProvider::new(py_obj)?;
    Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -21,7 +21,7 @@ use pyo3::{
    exceptions::{PyKeyError, PyRuntimeError, PyValueError},
    pyclass, pymethods,
    types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
-    Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
+    Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
 };
 use pyo3_async_runtimes::tokio::future_into_py;

@@ -287,12 +287,7 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let schema = inner.schema().await.infer_error()?;
-            #[allow(deprecated)]
-            let py_obj: Py<PyAny> = Python::with_gil(|py| -> PyResult<Py<PyAny>> {
-                let bound = schema.to_pyarrow(py)?;
-                Ok(bound.unbind())
-            })?;
-            Ok(py_obj)
+            Python::with_gil(|py| schema.to_pyarrow(py))
        })
    }

@@ -442,7 +437,6 @@ impl Table {
        future_into_py(self_.py(), async move {
            let stats = inner.index_stats(&index_name).await.infer_error()?;
            if let Some(stats) = stats {
-                #[allow(deprecated)]
                Python::with_gil(|py| {
                    let dict = PyDict::new(py);
                    dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
@@ -473,7 +467,6 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let stats = inner.stats().await.infer_error()?;
-            #[allow(deprecated)]
            Python::with_gil(|py| {
                let dict = PyDict::new(py);
                dict.set_item("total_bytes", stats.total_bytes)?;
@@ -528,7 +521,6 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let versions = inner.list_versions().await.infer_error()?;
-            #[allow(deprecated)]
            let versions_as_dict = Python::with_gil(|py| {
                versions
                    .iter()
@@ -880,7 +872,6 @@ impl Tags {
            let tags = inner.tags().await.infer_error()?;
            let res = tags.list().await.infer_error()?;

-            #[allow(deprecated)]
            Python::with_gil(|py| {
                let py_dict = PyDict::new(py);
                for (key, contents) in res {
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.23.1"
+version = "0.24.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -36,10 +36,42 @@ use crate::remote::{
 };
 use crate::table::{TableDefinition, WriteOptions};
 use crate::Table;
+use lance::io::ObjectStoreParams;
 pub use lance_encoding::version::LanceFileVersion;
 #[cfg(feature = "remote")]
 use lance_io::object_store::StorageOptions;
-use lance_io::object_store::StorageOptionsProvider;
+use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
+
+fn merge_storage_options(
+    store_params: &mut ObjectStoreParams,
+    pairs: impl IntoIterator<Item = (String, String)>,
+) {
+    let mut options = store_params.storage_options().cloned().unwrap_or_default();
+    for (key, value) in pairs {
+        options.insert(key, value);
+    }
+    let provider = store_params
+        .storage_options_accessor
+        .as_ref()
+        .and_then(|accessor| accessor.provider().cloned());
+    let accessor = if let Some(provider) = provider {
+        StorageOptionsAccessor::with_initial_and_provider(options, provider)
+    } else {
+        StorageOptionsAccessor::with_static_options(options)
+    };
+    store_params.storage_options_accessor = Some(Arc::new(accessor));
+}
+
+fn set_storage_options_provider(
+    store_params: &mut ObjectStoreParams,
+    provider: Arc<dyn StorageOptionsProvider>,
+) {
+    let accessor = match store_params.storage_options().cloned() {
+        Some(options) => StorageOptionsAccessor::with_initial_and_provider(options, provider),
+        None => StorageOptionsAccessor::with_provider(provider),
+    };
+    store_params.storage_options_accessor = Some(Arc::new(accessor));
+}

 /// A builder for configuring a [`Connection::table_names`] operation
 pub struct TableNamesBuilder {
@@ -246,16 +278,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    ///
    /// See available options at <https://lancedb.com/docs/storage/>
    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
-        let store_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert(Default::default())
            .store_params
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-        store_options.insert(key.into(), value.into());
+        merge_storage_options(store_params, [(key.into(), value.into())]);
        self
    }

@@ -269,19 +299,17 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
        mut self,
        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
    ) -> Self {
-        let store_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert(Default::default())
            .store_params
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-
-        for (key, value) in pairs {
-            store_options.insert(key.into(), value.into());
-        }
+        let updates = pairs
+            .into_iter()
+            .map(|(key, value)| (key.into(), value.into()));
+        merge_storage_options(store_params, updates);
        self
    }

@@ -318,23 +346,21 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    /// This has no effect in LanceDB Cloud.
    #[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
    pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert_with(Default::default)
            .store_params
-            .get_or_insert_with(Default::default)
-            .storage_options
            .get_or_insert_with(Default::default);
-
-        storage_options.insert(
-            OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(),
-            if use_v2_manifest_paths {
-                "true".to_string()
-            } else {
-                "false".to_string()
-            },
+        let value = if use_v2_manifest_paths {
+            "true".to_string()
+        } else {
+            "false".to_string()
+        };
+        merge_storage_options(
+            store_params,
+            [(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
        );
        self
    }
@@ -344,19 +370,19 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    /// The default is `LanceFileVersion::Stable`.
    #[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
    pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .write_options
            .lance_write_params
            .get_or_insert_with(Default::default)
            .store_params
-            .get_or_insert_with(Default::default)
-            .storage_options
            .get_or_insert_with(Default::default);
-
-        storage_options.insert(
-            OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
-            data_storage_version.to_string(),
+        merge_storage_options(
+            store_params,
+            [(
+                OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
+                data_storage_version.to_string(),
+            )],
        );
        self
    }
@@ -381,13 +407,14 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
    /// This allows tables to automatically refresh cloud storage credentials
    /// when they expire, enabling long-running operations on remote storage.
    pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
-        self.request
+        let store_params = self
+            .request
            .write_options
            .lance_write_params
            .get_or_insert(Default::default())
            .store_params
-            .get_or_insert(Default::default())
-            .storage_options_provider = Some(provider);
+            .get_or_insert(Default::default());
+        set_storage_options_provider(store_params, provider);
        self
    }
 }
@@ -450,15 +477,13 @@ impl OpenTableBuilder {
    ///
    /// See available options at <https://lancedb.com/docs/storage/>
    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .lance_read_params
            .get_or_insert(Default::default())
            .store_options
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-        storage_options.insert(key.into(), value.into());
+        merge_storage_options(store_params, [(key.into(), value.into())]);
        self
    }

@@ -472,18 +497,16 @@ impl OpenTableBuilder {
        mut self,
        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
    ) -> Self {
-        let storage_options = self
+        let store_params = self
            .request
            .lance_read_params
            .get_or_insert(Default::default())
            .store_options
-            .get_or_insert(Default::default())
-            .storage_options
            .get_or_insert(Default::default());
-
-        for (key, value) in pairs {
-            storage_options.insert(key.into(), value.into());
-        }
+        let updates = pairs
+            .into_iter()
+            .map(|(key, value)| (key.into(), value.into()));
+        merge_storage_options(store_params, updates);
        self
    }

@@ -507,12 +530,13 @@ impl OpenTableBuilder {
    /// This allows tables to automatically refresh cloud storage credentials
    /// when they expire, enabling long-running operations on remote storage.
    pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
-        self.request
+        let store_params = self
+            .request
            .lance_read_params
            .get_or_insert(Default::default())
            .store_options
-            .get_or_insert(Default::default())
-            .storage_options_provider = Some(provider);
+            .get_or_insert(Default::default());
+        set_storage_options_provider(store_params, provider);
        self
    }

@@ -868,6 +892,10 @@ pub struct ConnectBuilder {
    embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
 }

+#[cfg(feature = "remote")]
+const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
+    [("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
+
 impl ConnectBuilder {
    /// Create a new [`ConnectOptions`] with the given database URI.
    pub fn new(uri: &str) -> Self {
@@ -1051,11 +1079,27 @@ impl ConnectBuilder {
        self
    }

+    #[cfg(feature = "remote")]
+    fn apply_env_defaults(
+        env_var_to_remote_storage_option: &[(&str, &str)],
+        options: &mut HashMap<String, String>,
+    ) {
+        for (env_key, opt_key) in env_var_to_remote_storage_option {
+            if let Ok(env_value) = std::env::var(env_key) {
+                if !options.contains_key(*opt_key) {
+                    options.insert((*opt_key).to_string(), env_value);
+                }
+            }
+        }
+    }
+
    #[cfg(feature = "remote")]
    fn execute_remote(self) -> Result<Connection> {
        use crate::remote::db::RemoteDatabaseOptions;

-        let options = RemoteDatabaseOptions::parse_from_map(&self.request.options)?;
+        let mut merged_options = self.request.options.clone();
+        Self::apply_env_defaults(&ENV_VARS_TO_STORAGE_OPTS, &mut merged_options);
+        let options = RemoteDatabaseOptions::parse_from_map(&merged_options)?;

        let region = options.region.ok_or_else(|| Error::InvalidInput {
            message: "A region is required when connecting to LanceDb Cloud".to_string(),
@@ -1277,8 +1321,6 @@ mod test_utils {

 #[cfg(test)]
 mod tests {
-    use std::fs::create_dir_all;
-
    use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
    use crate::query::QueryBase;
    use crate::query::{ExecutableQuery, QueryExecutionOptions};
@@ -1302,6 +1344,23 @@ mod tests {
        assert_eq!(tc.connection.uri(), tc.uri);
    }

+    #[cfg(feature = "remote")]
+    #[test]
+    fn test_apply_env_defaults() {
+        let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
+        let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
+        let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
+        std::env::set_var(env_key, env_val);
+
+        let mut options = HashMap::new();
+        ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
+        assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
+
+        options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
+        ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
+        assert_eq!(Some(&"EXPLICIT-VALUE".to_string()), options.get(opts_key));
+    }
+
    #[cfg(not(windows))]
    #[tokio::test]
    async fn test_connect_relative() {
@@ -1526,18 +1585,27 @@ mod tests {

    #[tokio::test]
    async fn drop_table() {
-        let tmp_dir = tempdir().unwrap();
+        let tc = new_test_connection().await.unwrap();
+        let db = tc.connection;

-        let uri = tmp_dir.path().to_str().unwrap();
-        let db = connect(uri).execute().await.unwrap();
+        if tc.is_remote {
+            // All the typical endpoints such as s3:///, file-object-store:///, etc. treat drop_table
+            // as idempotent.
+            assert!(db.drop_table("invalid_table", &[]).await.is_ok());
+        } else {
+            // The behavior of drop_table when using a file:/// endpoint differs from all other
+            // object providers, in that it returns an error when deleting a non-existent table.
+            assert!(matches!(
+                db.drop_table("invalid_table", &[]).await,
+                Err(crate::Error::TableNotFound { .. }),
+            ));
+        }

-        // drop non-exist table
-        assert!(matches!(
-            db.drop_table("invalid_table", &[]).await,
-            Err(crate::Error::TableNotFound { .. }),
-        ));
-
-        create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
+        let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
+        db.create_empty_table("table1", schema.clone())
+            .execute()
+            .await
+            .unwrap();
        db.drop_table("table1", &[]).await.unwrap();

        let tables = db.table_names().execute().await.unwrap();
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
 use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_encoding::version::LanceFileVersion;
-use lance_io::object_store::StorageOptionsProvider;
+use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
 use lance_table::io::commit::commit_handler_from_url;
 use object_store::local::LocalFileSystem;
 use snafu::ResultExt;
@@ -356,7 +356,13 @@ impl ListingDatabase {
                    .clone()
                    .unwrap_or_else(|| Arc::new(lance::session::Session::default()));
                let os_params = ObjectStoreParams {
-                    storage_options: Some(options.storage_options.clone()),
+                    storage_options_accessor: if options.storage_options.is_empty() {
+                        None
+                    } else {
+                        Some(Arc::new(StorageOptionsAccessor::with_static_options(
+                            options.storage_options.clone(),
+                        )))
+                    },
                    ..Default::default()
                };
                let (object_store, base_path) = ObjectStore::from_uri_and_params(
@@ -492,7 +498,13 @@ impl ListingDatabase {

    async fn drop_tables(&self, names: Vec<String>) -> Result<()> {
        let object_store_params = ObjectStoreParams {
-            storage_options: Some(self.storage_options.clone()),
+            storage_options_accessor: if self.storage_options.is_empty() {
+                None
+            } else {
+                Some(Arc::new(StorageOptionsAccessor::with_static_options(
+                    self.storage_options.clone(),
+                )))
+            },
            ..Default::default()
        };
        let mut uri = self.uri.clone();
@@ -541,7 +553,7 @@ impl ListingDatabase {
            .lance_write_params
            .as_ref()
            .and_then(|p| p.store_params.as_ref())
-            .and_then(|sp| sp.storage_options.as_ref());
+            .and_then(|sp| sp.storage_options());

        let storage_version_override = storage_options
            .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
@@ -592,21 +604,20 @@ impl ListingDatabase {
        // will cause a new connection to be created, and that connection will
        // be dropped from the cache when python GCs the table object, which
        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = write_params
+        if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
+            let store_params = write_params
                .store_params
-                .get_or_insert_with(Default::default)
-                .storage_options
                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
-        }
-
-        // Set storage options provider if available
-        if self.storage_options_provider.is_some() {
-            write_params
-                .store_params
-                .get_or_insert_with(Default::default)
-                .storage_options_provider = self.storage_options_provider.clone();
+            let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
+            if !self.storage_options.is_empty() {
+                self.inherit_storage_options(&mut storage_options);
+            }
+            let accessor = if let Some(ref provider) = self.storage_options_provider {
+                StorageOptionsAccessor::with_initial_and_provider(storage_options, provider.clone())
+            } else {
+                StorageOptionsAccessor::with_static_options(storage_options)
+            };
+            store_params.storage_options_accessor = Some(Arc::new(accessor));
        }

        write_params.data_storage_version = self
@@ -892,7 +903,13 @@ impl Database for ListingDatabase {
        validate_table_name(&request.target_table_name)?;

        let storage_params = ObjectStoreParams {
-            storage_options: Some(self.storage_options.clone()),
+            storage_options_accessor: if self.storage_options.is_empty() {
+                None
+            } else {
+                Some(Arc::new(StorageOptionsAccessor::with_static_options(
+                    self.storage_options.clone(),
+                )))
+            },
            ..Default::default()
        };
        let read_params = ReadParams {
@@ -956,25 +973,28 @@ impl Database for ListingDatabase {
        // will cause a new connection to be created, and that connection will
        // be dropped from the cache when python GCs the table object, which
        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = request
+        if !self.storage_options.is_empty() || self.storage_options_provider.is_some() {
+            let store_params = request
                .lance_read_params
                .get_or_insert_with(Default::default)
                .store_options
-                .get_or_insert_with(Default::default)
-                .storage_options
                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
-        }
-
-        // Set storage options provider if available
-        if self.storage_options_provider.is_some() {
-            request
-                .lance_read_params
-                .get_or_insert_with(Default::default)
-                .store_options
-                .get_or_insert_with(Default::default)
-                .storage_options_provider = self.storage_options_provider.clone();
+            let mut storage_options = store_params.storage_options().cloned().unwrap_or_default();
+            if !self.storage_options.is_empty() {
+                self.inherit_storage_options(&mut storage_options);
+            }
+            // Preserve request-level provider if no connection-level provider exists
+            let request_provider = store_params
+                .storage_options_accessor
+                .as_ref()
+                .and_then(|a| a.provider().cloned());
+            let provider = self.storage_options_provider.clone().or(request_provider);
+            let accessor = if let Some(provider) = provider {
+                StorageOptionsAccessor::with_initial_and_provider(storage_options, provider)
+            } else {
+                StorageOptionsAccessor::with_static_options(storage_options)
+            };
+            store_params.storage_options_accessor = Some(Arc::new(accessor));
        }

        // Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -1881,7 +1901,9 @@ mod tests {
        let write_options = WriteOptions {
            lance_write_params: Some(lance::dataset::WriteParams {
                store_params: Some(lance::io::ObjectStoreParams {
-                    storage_options: Some(storage_options),
+                    storage_options_accessor: Some(Arc::new(
+                        StorageOptionsAccessor::with_static_options(storage_options),
+                    )),
                    ..Default::default()
                }),
                ..Default::default()
@@ -1955,7 +1977,9 @@ mod tests {
        let write_options = WriteOptions {
            lance_write_params: Some(lance::dataset::WriteParams {
                store_params: Some(lance::io::ObjectStoreParams {
-                    storage_options: Some(storage_options),
+                    storage_options_accessor: Some(Arc::new(
+                        StorageOptionsAccessor::with_static_options(storage_options),
+                    )),
                    ..Default::default()
                }),
                ..Default::default()
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -9,14 +9,15 @@ use std::sync::Arc;
 use async_trait::async_trait;
 use lance_namespace::{
    models::{
-        CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
-        DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
-        DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
-        ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
+        CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
+        DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
+        DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
+        ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
    },
    LanceNamespace,
 };
 use lance_namespace_impls::ConnectBuilder;
+use log::warn;

 use crate::database::ReadConsistency;
 use crate::error::{Error, Result};
@@ -154,7 +155,6 @@ impl Database for LanceNamespaceDatabase {
        table_id.push(request.name.clone());
        let describe_request = DescribeTableRequest {
            id: Some(table_id.clone()),
-            version: None,
            ..Default::default()
        };

@@ -205,26 +205,53 @@ impl Database for LanceNamespaceDatabase {
        let mut table_id = request.namespace.clone();
        table_id.push(request.name.clone());

-        let create_empty_request = DeclareTableRequest {
+        // Try declare_table first, falling back to create_empty_table for backwards
+        // compatibility with older namespace clients that don't support declare_table
+        let declare_request = DeclareTableRequest {
            id: Some(table_id.clone()),
-            location: None,
-            vend_credentials: None,
            ..Default::default()
        };

-        let create_empty_response = self
-            .namespace
-            .declare_table(create_empty_request)
-            .await
-            .map_err(|e| Error::Runtime {
-                message: format!("Failed to declare table: {}", e),
-            })?;
+        let location = match self.namespace.declare_table(declare_request).await {
+            Ok(response) => response.location.ok_or_else(|| Error::Runtime {
+                message: "Table location is missing from declare_table response".to_string(),
+            })?,
+            Err(e) => {
+                // Check if the error is "not supported" and try create_empty_table as fallback
+                let err_str = e.to_string().to_lowercase();
+                if err_str.contains("not supported") || err_str.contains("not implemented") {
+                    warn!(
+                        "declare_table is not supported by the namespace client, \
+                        falling back to deprecated create_empty_table. \
+                        create_empty_table is deprecated and will be removed in Lance 3.0.0. \
+                        Please upgrade your namespace client to support declare_table."
+                    );
+                    #[allow(deprecated)]
+                    let create_empty_request = CreateEmptyTableRequest {
+                        id: Some(table_id.clone()),
+                        ..Default::default()
+                    };

-        let location = create_empty_response
-            .location
-            .ok_or_else(|| Error::Runtime {
-                message: "Table location is missing from create_empty_table response".to_string(),
-            })?;
+                    #[allow(deprecated)]
+                    let create_response = self
+                        .namespace
+                        .create_empty_table(create_empty_request)
+                        .await
+                        .map_err(|e| Error::Runtime {
+                            message: format!("Failed to create empty table: {}", e),
+                        })?;
+
+                    create_response.location.ok_or_else(|| Error::Runtime {
+                        message: "Table location is missing from create_empty_table response"
+                            .to_string(),
+                    })?
+                } else {
+                    return Err(Error::Runtime {
+                        message: format!("Failed to declare table: {}", e),
+                    });
+                }
+            }
+        };

        let native_table = NativeTable::create_from_namespace(
            self.namespace.clone(),
@@ -439,8 +466,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
@@ -501,8 +526,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
@@ -566,8 +589,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
@@ -651,8 +672,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
@@ -708,8 +727,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
@@ -790,8 +807,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
@@ -825,8 +840,6 @@ mod tests {
        // Create a child namespace first
        conn.create_namespace(CreateNamespaceRequest {
            id: Some(vec!["test_ns".into()]),
-            mode: None,
-            properties: None,
            ..Default::default()
        })
        .await
--- a/rust/lancedb/src/dataloader/permutation/builder.rs
+++ b/rust/lancedb/src/dataloader/permutation/builder.rs
@@ -19,7 +19,7 @@ use crate::{
        split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
        util::{rename_column, TemporaryDirectory},
    },
-    query::{ExecutableQuery, QueryBase},
+    query::{ExecutableQuery, QueryBase, Select},
    Error, Result, Table,
 };

@@ -27,6 +27,8 @@ pub const SRC_ROW_ID_COL: &str = "row_id";

 pub const SPLIT_NAMES_CONFIG_KEY: &str = "split_names";

+pub const DEFAULT_MEMORY_LIMIT: usize = 100 * 1024 * 1024;
+
 /// Where to store the permutation table
 #[derive(Debug, Clone, Default)]
 enum PermutationDestination {
@@ -167,10 +169,20 @@ impl PermutationBuilder {
        &self,
        data: SendableRecordBatchStream,
    ) -> Result<SendableRecordBatchStream> {
+        let memory_limit = std::env::var("LANCEDB_PERM_BUILDER_MEMORY_LIMIT")
+            .unwrap_or_else(|_| DEFAULT_MEMORY_LIMIT.to_string())
+            .parse::<usize>()
+            .unwrap_or_else(|_| {
+                log::error!(
+                    "Failed to parse LANCEDB_PERM_BUILDER_MEMORY_LIMIT, using default: {}",
+                    DEFAULT_MEMORY_LIMIT
+                );
+                DEFAULT_MEMORY_LIMIT
+            });
        let ctx = SessionContext::new_with_config_rt(
            SessionConfig::default(),
            RuntimeEnvBuilder::new()
-                .with_memory_limit(100 * 1024 * 1024, 1.0)
+                .with_memory_limit(memory_limit, 1.0)
                .with_disk_manager_builder(
                    DiskManagerBuilder::default()
                        .with_mode(self.config.temp_dir.to_disk_manager_mode()),
@@ -232,7 +244,7 @@ impl PermutationBuilder {
    /// Builds the permutation table and stores it in the given database.
    pub async fn build(self) -> Result<Table> {
        // First pass, apply filter and load row ids
-        let mut rows = self.base_table.query().with_row_id();
+        let mut rows = self.base_table.query().select(Select::columns(&[ROW_ID]));

        if let Some(filter) = &self.config.filter {
            rows = rows.only_if(filter);
@@ -321,6 +333,47 @@ mod tests {

    use super::*;

+    #[tokio::test]
+    async fn test_permutation_table_only_stores_row_id_and_split_id() {
+        let temp_dir = tempfile::tempdir().unwrap();
+
+        let db = connect(temp_dir.path().to_str().unwrap())
+            .execute()
+            .await
+            .unwrap();
+
+        let initial_data = lance_datagen::gen_batch()
+            .col("col_a", lance_datagen::array::step::<Int32Type>())
+            .col("col_b", lance_datagen::array::step::<Int32Type>())
+            .into_ldb_stream(RowCount::from(100), BatchCount::from(10));
+        let data_table = db
+            .create_table_streaming("base_tbl", initial_data)
+            .execute()
+            .await
+            .unwrap();
+
+        let permutation_table = PermutationBuilder::new(data_table.clone())
+            .with_split_strategy(
+                SplitStrategy::Sequential {
+                    sizes: SplitSizes::Percentages(vec![0.5, 0.5]),
+                },
+                None,
+            )
+            .with_filter("col_a > 57".to_string())
+            .build()
+            .await
+            .unwrap();
+
+        let schema = permutation_table.schema().await.unwrap();
+        let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+        assert_eq!(
+            field_names,
+            vec!["row_id", "split_id"],
+            "Permutation table should only contain row_id and split_id columns, but found: {:?}",
+            field_names,
+        );
+    }
+
    #[tokio::test]
    async fn test_permutation_builder() {
        let temp_dir = tempfile::tempdir().unwrap();
@@ -352,8 +405,6 @@ mod tests {
            .await
            .unwrap();

-        println!("permutation_table: {:?}", permutation_table);
-
        // Potentially brittle seed-dependent values below
        assert_eq!(permutation_table.count_rows(None).await.unwrap(), 330);
        assert_eq!(
--- a/rust/lancedb/src/dataloader/permutation/shuffle.rs
+++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs
@@ -171,7 +171,7 @@ impl Shuffler {
            // This is kind of an annoying limitation but if we allow runt clumps from batches then
            // clumps will get unaligned and we will mess up the clumps when we do the in-memory
            // shuffle step.  If this is a problem we can probably figure out a better way to do this.
-            if !is_last && batch.num_rows() as u64 % clump_size != 0 {
+            if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
                return Err(Error::Runtime {
                    message: format!(
                        "Expected batch size ({}) to be divisible by clump size ({})",
--- a/rust/lancedb/src/dataloader/permutation/split.rs
+++ b/rust/lancedb/src/dataloader/permutation/split.rs
@@ -1,12 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::{
-    iter,
-    sync::{
-        atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
-        Arc,
-    },
+use std::sync::{
+    atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
+    Arc,
 };

 use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -158,7 +155,7 @@ impl Splitter {
                remaining_in_split
            };

-            split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
+            split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
            if done {
                // Quit early if we've run out of splits
                break;
@@ -662,7 +659,7 @@ mod tests {
        assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
        let mut expected = Vec::with_capacity(total_split_sizes as usize);
        for (i, size) in expected_split_sizes.iter().enumerate() {
-            expected.extend(iter::repeat(i as u64).take(*size as usize));
+            expected.extend(std::iter::repeat_n(i as u64, *size as usize));
        }
        let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;

--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
 }

 pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
-    if dim % 16 == 0 {
+    if dim.is_multiple_of(16) {
        // Should be more aggressive than this default.
        dim / 16
-    } else if dim % 8 == 0 {
+    } else if dim.is_multiple_of(8) {
        dim / 8
    } else {
        log::warn!(
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -51,24 +51,19 @@
 //! - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud object store
 //! - `db://dbname` - Lance Cloud
 //!
-//! You can also use [`ConnectOptions`] to configure the connection to the database.
+//! You can also use [`ConnectBuilder`] to configure the connection to the database.
 //!
 //! ```rust
-//! # #[cfg(feature = "aws")]
-//! # {
-//! use object_store::aws::AwsCredential;
 //! # tokio::runtime::Runtime::new().unwrap().block_on(async {
 //! let db = lancedb::connect("data/sample-lancedb")
-//!     .aws_creds(AwsCredential {
-//!         key_id: "some_key".to_string(),
-//!         secret_key: "some_secret".to_string(),
-//!         token: None,
-//!     })
+//!     .storage_options([
+//!         ("aws_access_key_id", "some_key"),
+//!         ("aws_secret_access_key", "some_secret"),
+//!     ])
 //!     .execute()
 //!     .await
 //!     .unwrap();
 //! # });
-//! # }
 //! ```
 //!
 //! LanceDB uses [arrow-rs](https://github.com/apache/arrow-rs) to define schema, data types and array itself.
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -1718,8 +1718,6 @@ mod tests {
            let namespace = vec!["test_ns".to_string()];
            conn.create_namespace(CreateNamespaceRequest {
                id: Some(namespace.clone()),
-                mode: None,
-                properties: None,
                ..Default::default()
            })
            .await
@@ -1745,8 +1743,6 @@ mod tests {
            let list_response = conn
                .list_tables(ListTablesRequest {
                    id: Some(namespace.clone()),
-                    page_token: None,
-                    limit: None,
                    ..Default::default()
                })
                .await
@@ -1758,8 +1754,6 @@ mod tests {
            let list_response = namespace_client
                .list_tables(ListTablesRequest {
                    id: Some(namespace.clone()),
-                    page_token: None,
-                    limit: None,
                    ..Default::default()
                })
                .await
@@ -1800,8 +1794,6 @@ mod tests {
            let namespace = vec!["multi_table_ns".to_string()];
            conn.create_namespace(CreateNamespaceRequest {
                id: Some(namespace.clone()),
-                mode: None,
-                properties: None,
                ..Default::default()
            })
            .await
@@ -1827,8 +1819,6 @@ mod tests {
            let list_response = conn
                .list_tables(ListTablesRequest {
                    id: Some(namespace.clone()),
-                    page_token: None,
-                    limit: None,
                    ..Default::default()
                })
                .await
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
 use lance_index::vector::sq::builder::SQBuildParams;
 use lance_index::DatasetIndexExt;
 use lance_index::IndexType;
-use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
+use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor};
 use lance_namespace::models::{
    QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns,
    QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery,
@@ -1425,9 +1425,7 @@ impl Table {
            })
            .collect::<Vec<_>>();

-        let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime {
-            message: format!("Failed to build union plan: {e}"),
-        })?;
+        let unioned = Arc::new(UnionExec::new(projected_plans));
        // We require 1 partition in the final output
        let repartitioned = RepartitionExec::try_new(
            unioned,
@@ -1668,18 +1666,14 @@ impl NativeTable {

        // Use DatasetBuilder::from_namespace which automatically fetches location
        // and storage options from the namespace
-        let builder = DatasetBuilder::from_namespace(
-            namespace_client.clone(),
-            table_id,
-            false, // Don't ignore namespace storage options
-        )
-        .await
-        .map_err(|e| match e {
-            lance::Error::Namespace { source, .. } => Error::Runtime {
-                message: format!("Failed to get table info from namespace: {:?}", source),
-            },
-            source => Error::Lance { source },
-        })?;
+        let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id)
+            .await
+            .map_err(|e| match e {
+                lance::Error::Namespace { source, .. } => Error::Runtime {
+                    message: format!("Failed to get table info from namespace: {:?}", source),
+                },
+                source => Error::Lance { source },
+            })?;

        let dataset = builder
            .with_read_params(params)
@@ -1883,7 +1877,13 @@ impl NativeTable {
        let store_params = params
            .store_params
            .get_or_insert_with(ObjectStoreParams::default);
-        store_params.storage_options_provider = Some(storage_options_provider);
+        let accessor = match store_params.storage_options().cloned() {
+            Some(options) => {
+                StorageOptionsAccessor::with_initial_and_provider(options, storage_options_provider)
+            }
+            None => StorageOptionsAccessor::with_provider(storage_options_provider),
+        };
+        store_params.storage_options_accessor = Some(Arc::new(accessor));

        // Patch the params if we have a write store wrapper
        let params = match write_store_wrapper.clone() {
@@ -2059,7 +2059,7 @@ impl NativeTable {
            return provided;
        }
        let suggested = suggested_num_sub_vectors(dim);
-        if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
+        if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
            // num_sub_vectors must be even when 4 bits are used
            suggested + 1
        } else {
@@ -2349,7 +2349,7 @@ impl NativeTable {
                };

                // Convert select to columns list
-                let columns: Option<Box<QueryTableRequestColumns>> = match &vq.base.select {
+                let columns = match &vq.base.select {
                    Select::All => None,
                    Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
                        column_names: Some(cols.clone()),
@@ -2407,7 +2407,6 @@ impl NativeTable {
                    with_row_id: Some(vq.base.with_row_id),
                    bypass_vector_index: Some(!vq.use_index),
                    full_text_query,
-                    version: None,
                    ..Default::default()
                })
            }
@@ -2426,7 +2425,7 @@ impl NativeTable {
                    .map(|f| self.filter_to_sql(f))
                    .transpose()?;

-                let columns: Option<Box<QueryTableRequestColumns>> = match &q.select {
+                let columns = match &q.select {
                    Select::All => None,
                    Select::Columns(cols) => Some(Box::new(QueryTableRequestColumns {
                        column_names: Some(cols.clone()),
@@ -2470,18 +2469,10 @@ impl NativeTable {
                    columns,
                    prefilter: Some(q.prefilter),
                    offset: q.offset.map(|o| o as i32),
-                    ef: None,
-                    refine_factor: None,
-                    distance_type: None,
-                    nprobes: None,
                    vector_column: None, // No vector column for plain queries
                    with_row_id: Some(q.with_row_id),
                    bypass_vector_index: Some(true), // No vector index for plain queries
                    full_text_query,
-                    version: None,
-                    fast_search: None,
-                    lower_bound: None,
-                    upper_bound: None,
                    ..Default::default()
                })
            }
@@ -3244,7 +3235,7 @@ impl BaseTable for NativeTable {
            .get()
            .await
            .ok()
-            .and_then(|dataset| dataset.storage_options().cloned())
+            .and_then(|dataset| dataset.initial_storage_options().cloned())
    }

    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
@@ -3409,7 +3400,6 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
-    use std::iter;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::sync::Arc;
    use std::time::Duration;
@@ -4026,7 +4016,7 @@ mod tests {
                schema.clone(),
                vec![
                    Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
-                    Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
+                    Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
                ],
            )],
            schema,
@@ -5154,15 +5144,16 @@ mod tests {
        let any_query = AnyQuery::VectorQuery(vq);
        let ns_request = table.convert_to_namespace_query(&any_query).unwrap();

-        let column_names = ns_request
-            .columns
-            .as_ref()
-            .and_then(|cols| cols.column_names.clone());
-
        assert_eq!(ns_request.k, 10);
        assert_eq!(ns_request.offset, Some(5));
        assert_eq!(ns_request.filter, Some("id > 0".to_string()));
-        assert_eq!(column_names, Some(vec!["id".to_string()]));
+        assert_eq!(
+            ns_request
+                .columns
+                .as_ref()
+                .and_then(|c| c.column_names.as_ref()),
+            Some(&vec!["id".to_string()])
+        );
        assert_eq!(ns_request.vector_column, Some("vector".to_string()));
        assert_eq!(ns_request.distance_type, Some("l2".to_string()));
        assert!(ns_request.vector.single_vector.is_some());
@@ -5199,16 +5190,17 @@ mod tests {
        let any_query = AnyQuery::Query(q);
        let ns_request = table.convert_to_namespace_query(&any_query).unwrap();

-        let column_names = ns_request
-            .columns
-            .as_ref()
-            .and_then(|cols| cols.column_names.clone());
-
        // Plain queries should pass an empty vector
        assert_eq!(ns_request.k, 20);
        assert_eq!(ns_request.offset, Some(5));
        assert_eq!(ns_request.filter, Some("id > 5".to_string()));
-        assert_eq!(column_names, Some(vec!["id".to_string()]));
+        assert_eq!(
+            ns_request
+                .columns
+                .as_ref()
+                .and_then(|c| c.column_names.as_ref()),
+            Some(&vec!["id".to_string()])
+        );
        assert_eq!(ns_request.with_row_id, Some(true));
        assert_eq!(ns_request.bypass_vector_index, Some(true));
        assert!(ns_request.vector_column.is_none()); // No vector column for plain queries
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -100,8 +100,7 @@ impl DatasetRef {
                let should_checkout = match &target_ref {
                    refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
                    refs::Ref::Version(_, None) => true, // No specific version, always checkout
-                    refs::Ref::VersionNumber(target_ver) => version != target_ver,
-                    refs::Ref::Tag(_) => true, // Always checkout for tags
+                    refs::Ref::Tag(_) => true,           // Always checkout for tags
                };

                if should_checkout {
--- a/rust/lancedb/tests/embedding_registry_test.rs
+++ b/rust/lancedb/tests/embedding_registry_test.rs
@@ -4,7 +4,6 @@
 use std::{
    borrow::Cow,
    collections::{HashMap, HashSet},
-    iter::repeat,
    sync::Arc,
 };

@@ -268,9 +267,10 @@ fn create_some_records() -> Result<impl IntoArrow> {
            schema.clone(),
            vec![
                Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
-                Arc::new(StringArray::from_iter(
-                    repeat(Some("hello world".to_string())).take(TOTAL),
-                )),
+                Arc::new(StringArray::from_iter(std::iter::repeat_n(
+                    Some("hello world".to_string()),
+                    TOTAL,
+                ))),
            ],
        )
        .unwrap()]
Author	SHA1	Message	Date
Lei Xu	b57faf9835	rename gha task	2026-01-29 16:26:28 -08:00
Lei Xu	4800f31479	fixlint	2026-01-29 16:21:23 -08:00
Lei Xu	ec464ad01e	remove pydantic 1 support	2026-01-29 16:18:56 -08:00
Weston Pace	9be28448f5	fix: don't store all columns in the permutation table (#2957 ) The permutation table was always intended to be a small table of row id pointers (and split id). However, it was accidentally doing a full materialization of the base table 🤦 This PR changes the permutation builder to only store row id and split id.	2026-01-29 16:06:36 -08:00
Lei Xu	357197bacc	chore!: change support python version from 3.10 to 3.13 (#2955 ) Python 3.9 is EOL since Oct 2025. and last two pyarrow builts were against python3.10-3.13. * This PR is contributed by codex-gpt5.2	2026-01-30 01:47:50 +08:00
Lei Xu	ad51e2dd1f	fix: support pydantic list of structs or optional struct (#2953 ) Closes #2950 This code is generated by codex-gpt5.2	2026-01-28 21:08:18 -08:00
Weston Pace	e9e904783c	feat: allow the permutation builder memory limit to be configured by env var (#2946 ) Running into issues with DF sorting again. This will at least allow the memory limit to be set large to bypass problems.	2026-01-28 09:02:59 +05:30
Lance Release	8500b16eca	Bump version: 0.24.1-beta.0 → 0.24.1	2026-01-26 23:39:18 +00:00
Lance Release	57e7282342	Bump version: 0.24.0 → 0.24.1-beta.0	2026-01-26 23:38:50 +00:00
Lance Release	cc5f8070d7	Bump version: 0.27.1-beta.0 → 0.27.1	2026-01-26 23:38:24 +00:00
Lance Release	dc0fb01f6b	Bump version: 0.27.0 → 0.27.1-beta.0	2026-01-26 23:38:23 +00:00
LanceDB Robot	94b7781551	feat: update lance dependency to v1.0.4 (#2944 ) ## Summary - bump Lance dependencies to v1.0.4 - run `cargo clippy --workspace --tests --all-features -- -D warnings` - run `cargo fmt --all` ## Testing - `cargo clippy --workspace --tests --all-features -- -D warnings` ## Reference - https://github.com/lance-format/lance/releases/tag/v1.0.4	2026-01-26 15:37:28 -08:00
Jack Ye	7bf020b3d5	chore: fix clippy when remote flag is not set (#2943 ) Also add a step in CI to ensure this does not happen in the future	2026-01-26 13:59:31 -08:00
LanceDB Robot	12a98479dc	chore: update lance dependency to v1.0.4-rc.1 (#2942 ) ## Summary - bump Lance dependencies to v1.0.4-rc.1 - verified `cargo clippy --workspace --tests --all-features -- -D warnings` - ran `cargo fmt --all` ## References - https://github.com/lance-format/lance/releases/tag/v1.0.4-rc.1	2026-01-26 12:17:22 -08:00
Jack Ye	e4552e577a	chore(revert): revert update lance dependency to v2.0.0-rc.1 (#2936 ) (#2941 ) This reverts commit `bd84bba14d`, so that we can bump version to 1.0.4-rc.1	2026-01-26 11:13:59 -08:00
Will Jones	f979a902ad	ci(rust): fix MSRV check (#2940 ) Realized our MSRV check was inert because `rust-toolchain.toml` was overriding the Rust version. We set the `RUSTUP_TOOLCHAIN` environment variable, which overrides that. Also needed to update to MSRV 1.88 (due to dependencies like Lance and DataFusion) and fix some clippy warnings.	2026-01-23 15:57:09 -08:00
Colin Patrick McCabe	5a7a8da567	feat: check AZURE_STORAGE_ACCOUNT_NAME in remote conns (#2918 ) Unlike in Amazon S3, in Azure bucket names are not globally unique. Instead, the combination of (storage_account_name, bucket_name) is unique. Therefore, when using Azure blob store, we always need a way to configure the storage account name. One way is to use the storage_options hash map and set azure_storage_account_name. Another way is to set an environment variable, AZURE_STORAGE_ACCOUNT_NAME. Prior to this PR, the second way (environment variable) did not work with remote connections. This is because the existing code that checks for these environment variables happens inside the Azure object store implementation itself, which does not run locally when using remote connections. This PR addresses that situation by adding a check of the environment variable. This functions as a default if the relevant storage option is not set in the storage_options hash map.	2026-01-22 13:36:05 -08:00
Jack Ye	0db8176445	test: fix failing remote doctest reference to aws feature (#2935 ) Closes https://github.com/lancedb/lancedb/issues/2933	2026-01-22 13:17:03 -08:00
LanceDB Robot	bd84bba14d	chore: update lance dependency to v2.0.0-rc.1 (#2936 ) ## Summary - bump Lance dependencies to v2.0.0-rc.1 (git tag) - align Arrow/DataFusion/PyO3 versions for the new Lance release - update Python bindings for PyO3 0.26 (attach API + Py<PyAny>) ## Verification - `cargo clippy --workspace --tests --all-features -- -D warnings` - `cargo fmt --all` ## Reference - https://github.com/lance-format/lance/releases/tag/v2.0.0-rc.1 --------- Co-authored-by: Jack Ye <yezhaoqin@gmail.com> Co-authored-by: Will Jones <willjones127@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: BubbleCal <bubble_cal@outlook.com>	2026-01-22 13:14:38 -08:00
Lance Release	ac07f8068c	Bump version: 0.24.0-beta.1 → 0.24.0	2026-01-22 01:10:15 +00:00
Lance Release	bba362d372	Bump version: 0.24.0-beta.0 → 0.24.0-beta.1	2026-01-22 01:09:53 +00:00
Lance Release	042bc22468	Bump version: 0.27.0-beta.1 → 0.27.0	2026-01-22 01:09:32 +00:00
Lance Release	68569906c6	Bump version: 0.27.0-beta.0 → 0.27.0-beta.1	2026-01-22 01:09:31 +00:00
LanceDB Robot	c71c1fc822	feat: update lance dependency to v1.0.3 (#2932 ) ## Summary - bump Lance dependency to v1.0.3 - refresh Cargo metadata and lockfile ## Verification - cargo clippy --workspace --tests --all-features -- -D warnings - cargo fmt --all ## Release - https://github.com/lance-format/lance/releases/tag/v1.0.3	2026-01-21 17:08:24 -08:00
Jack Ye	4a6a0c856e	ci: fix codex version bump title and summary (#2931 ) 1. use feat for releases, chore for prereleases 2. do not have literal `\n` in summary	2026-01-21 15:45:28 -08:00
Jack Ye	f124c9d8d2	test: string type conversion in pandas 3.0+ (#2928 ) Pandas 3.0+ string now converts to Arrow large_utf8. This PR mainly makes sure our test accounts for the difference across the pandas versions when constructing schema.	2026-01-21 13:40:48 -08:00
Jack Ye	4e65748abf	chore: update lance dependency to v1.0.3-rc.1 (#2927 ) Supercedes https://github.com/lancedb/lancedb/pull/2925 We accidentally upgraded lance to 2.0.0-beta.8. This PR reverts that first and then bump to 1.0.3-rc.1 --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-21 11:52:07 -08:00
Colin Patrick McCabe	e897f3edab	test: assert remote behavior of drop_table (#2926 ) Add support for testing remote connections in drop_table in `rust/lancedb/src/connection.rs`.	2026-01-21 08:42:40 -08:00
Lance Release	790ba7115b	Bump version: 0.23.1 → 0.24.0-beta.0	2026-01-21 12:21:53 +00:00