From f979a902ad8fe383a5aba84cd04e09df28cdc8e1 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Fri, 23 Jan 2026 15:57:09 -0800
Subject: [PATCH] ci(rust): fix MSRV check (#2940)

Realized our MSRV check was inert because `rust-toolchain.toml` was
overriding the Rust version. We set the `RUSTUP_TOOLCHAIN` environment
variable, which overrides that.

Also needed to update to MSRV 1.88 (due to dependencies like Lance and
DataFusion) and fix some clippy warnings.
---
 .github/workflows/rust.yml                         |  4 +++-
 Cargo.toml                                         |  2 +-
 python/Cargo.toml                                  |  2 +-
 rust/lancedb/src/dataloader/permutation/shuffle.rs |  2 +-
 rust/lancedb/src/dataloader/permutation/split.rs   | 13 +++++--------
 rust/lancedb/src/index/vector.rs                   |  4 ++--
 rust/lancedb/src/table.rs                          |  5 ++---
 rust/lancedb/tests/embedding_registry_test.rs      |  8 ++++----
 8 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 22f3bc6b2..8fab085d6 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -181,7 +181,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
+        msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
     env:
       # Need up-to-date compilers for kernels
       CC: clang-18
@@ -212,4 +212,6 @@ jobs:
           cargo update -p aws-sdk-sts --precise 1.51.0
           cargo update -p home --precise 0.5.9
       - name: cargo +${{ matrix.msrv }} check
+        env:
+          RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
         run: cargo check --profile ci --workspace --tests --benches --all-features
diff --git a/Cargo.toml b/Cargo.toml
index d14e26a96..77e62b4e2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.78.0"
+rust-version = "1.88.0"
 
 [workspace.dependencies]
 lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
diff --git a/python/Cargo.toml b/python/Cargo.toml
index c1c68df6c..23d86fa74 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 repository.workspace = true
 keywords.workspace = true
 categories.workspace = true
-rust-version = "1.75.0"
+rust-version = "1.88.0"
 
 [lib]
 name = "_lancedb"
diff --git a/rust/lancedb/src/dataloader/permutation/shuffle.rs b/rust/lancedb/src/dataloader/permutation/shuffle.rs
index 0d1ec43e1..53f7e1af5 100644
--- a/rust/lancedb/src/dataloader/permutation/shuffle.rs
+++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs
@@ -171,7 +171,7 @@ impl Shuffler {
             // This is kind of an annoying limitation but if we allow runt clumps from batches then
             // clumps will get unaligned and we will mess up the clumps when we do the in-memory
             // shuffle step.  If this is a problem we can probably figure out a better way to do this.
-            if !is_last && batch.num_rows() as u64 % clump_size != 0 {
+            if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
                 return Err(Error::Runtime {
                     message: format!(
                         "Expected batch size ({}) to be divisible by clump size ({})",
diff --git a/rust/lancedb/src/dataloader/permutation/split.rs b/rust/lancedb/src/dataloader/permutation/split.rs
index e7dc52830..2bcf1c1e0 100644
--- a/rust/lancedb/src/dataloader/permutation/split.rs
+++ b/rust/lancedb/src/dataloader/permutation/split.rs
@@ -1,12 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 
-use std::{
-    iter,
-    sync::{
-        atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
-        Arc,
-    },
+use std::sync::{
+    atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
+    Arc,
 };
 
 use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -158,7 +155,7 @@ impl Splitter {
                 remaining_in_split
             };
 
-            split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
+            split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
             if done {
                 // Quit early if we've run out of splits
                 break;
@@ -662,7 +659,7 @@ mod tests {
         assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
         let mut expected = Vec::with_capacity(total_split_sizes as usize);
         for (i, size) in expected_split_sizes.iter().enumerate() {
-            expected.extend(iter::repeat(i as u64).take(*size as usize));
+            expected.extend(std::iter::repeat_n(i as u64, *size as usize));
         }
         let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;
 
diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs
index 6268b36cd..65990cf03 100644
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
 }
 
 pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
-    if dim % 16 == 0 {
+    if dim.is_multiple_of(16) {
         // Should be more aggressive than this default.
         dim / 16
-    } else if dim % 8 == 0 {
+    } else if dim.is_multiple_of(8) {
         dim / 8
     } else {
         log::warn!(
diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs
index f840f3e5a..e3743a4e7 100644
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -2061,7 +2061,7 @@ impl NativeTable {
             return provided;
         }
         let suggested = suggested_num_sub_vectors(dim);
-        if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
+        if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
             // num_sub_vectors must be even when 4 bits are used
             suggested + 1
         } else {
@@ -3402,7 +3402,6 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
-    use std::iter;
     use std::sync::atomic::{AtomicBool, Ordering};
     use std::sync::Arc;
     use std::time::Duration;
@@ -4019,7 +4018,7 @@ mod tests {
                 schema.clone(),
                 vec![
                     Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
-                    Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
+                    Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
                 ],
             )],
             schema,
diff --git a/rust/lancedb/tests/embedding_registry_test.rs b/rust/lancedb/tests/embedding_registry_test.rs
index c87fad74f..4c636aad4 100644
--- a/rust/lancedb/tests/embedding_registry_test.rs
+++ b/rust/lancedb/tests/embedding_registry_test.rs
@@ -4,7 +4,6 @@
 use std::{
     borrow::Cow,
     collections::{HashMap, HashSet},
-    iter::repeat,
     sync::Arc,
 };
 
@@ -268,9 +267,10 @@ fn create_some_records() -> Result<impl IntoArrow> {
             schema.clone(),
             vec![
                 Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
-                Arc::new(StringArray::from_iter(
-                    repeat(Some("hello world".to_string())).take(TOTAL),
-                )),
+                Arc::new(StringArray::from_iter(std::iter::repeat_n(
+                    Some("hello world".to_string()),
+                    TOTAL,
+                ))),
             ],
         )
         .unwrap()]