ci(rust): fix MSRV check (#2940)

Realized our MSRV check was inert because `rust-toolchain.toml` was overriding the Rust version. We set the `RUSTUP_TOOLCHAIN` environment variable, which overrides that. Also needed to update to MSRV 1.88 (due to dependencies like Lance and DataFusion) and fix some clippy warnings.
2026-07-03 11:00:40 +00:00 · 2026-01-23 15:57:09 -08:00
parent 5a7a8da567
commit f979a902ad
8 changed files with 19 additions and 21 deletions
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -181,7 +181,7 @@ jobs:
    runs-on: ubuntu-24.04
    strategy:
      matrix:
-        msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
+        msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
    env:
      # Need up-to-date compilers for kernels
      CC: clang-18
@@ -212,4 +212,6 @@ jobs:
          cargo update -p aws-sdk-sts --precise 1.51.0
          cargo update -p home --precise 0.5.9
      - name: cargo +${{ matrix.msrv }} check
+        env:
+          RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
        run: cargo check --profile ci --workspace --tests --benches --all-features
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,7 +12,7 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.78.0"
+rust-version = "1.88.0"

 [workspace.dependencies]
 lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 repository.workspace = true
 keywords.workspace = true
 categories.workspace = true
-rust-version = "1.75.0"
+rust-version = "1.88.0"

 [lib]
 name = "_lancedb"
--- a/rust/lancedb/src/dataloader/permutation/shuffle.rs
+++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs
@@ -171,7 +171,7 @@ impl Shuffler {
            // This is kind of an annoying limitation but if we allow runt clumps from batches then
            // clumps will get unaligned and we will mess up the clumps when we do the in-memory
            // shuffle step.  If this is a problem we can probably figure out a better way to do this.
-            if !is_last && batch.num_rows() as u64 % clump_size != 0 {
+            if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
                return Err(Error::Runtime {
                    message: format!(
                        "Expected batch size ({}) to be divisible by clump size ({})",
--- a/rust/lancedb/src/dataloader/permutation/split.rs
+++ b/rust/lancedb/src/dataloader/permutation/split.rs
@@ -1,12 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::{
-    iter,
-    sync::{
-        atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
-        Arc,
-    },
+use std::sync::{
+    atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
+    Arc,
 };

 use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
@@ -158,7 +155,7 @@ impl Splitter {
                remaining_in_split
            };

-            split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
+            split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
            if done {
                // Quit early if we've run out of splits
                break;
@@ -662,7 +659,7 @@ mod tests {
        assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
        let mut expected = Vec::with_capacity(total_split_sizes as usize);
        for (i, size) in expected_split_sizes.iter().enumerate() {
-            expected.extend(iter::repeat(i as u64).take(*size as usize));
+            expected.extend(std::iter::repeat_n(i as u64, *size as usize));
        }
        let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;

--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
 }

 pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
-    if dim % 16 == 0 {
+    if dim.is_multiple_of(16) {
        // Should be more aggressive than this default.
        dim / 16
-    } else if dim % 8 == 0 {
+    } else if dim.is_multiple_of(8) {
        dim / 8
    } else {
        log::warn!(
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -2061,7 +2061,7 @@ impl NativeTable {
            return provided;
        }
        let suggested = suggested_num_sub_vectors(dim);
-        if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
+        if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
            // num_sub_vectors must be even when 4 bits are used
            suggested + 1
        } else {
@@ -3402,7 +3402,6 @@ pub struct FragmentSummaryStats {
 #[cfg(test)]
 #[allow(deprecated)]
 mod tests {
-    use std::iter;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::sync::Arc;
    use std::time::Duration;
@@ -4019,7 +4018,7 @@ mod tests {
                schema.clone(),
                vec![
                    Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
-                    Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
+                    Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
                ],
            )],
            schema,
--- a/rust/lancedb/tests/embedding_registry_test.rs
+++ b/rust/lancedb/tests/embedding_registry_test.rs
@@ -4,7 +4,6 @@
 use std::{
    borrow::Cow,
    collections::{HashMap, HashSet},
-    iter::repeat,
    sync::Arc,
 };

@@ -268,9 +267,10 @@ fn create_some_records() -> Result<impl IntoArrow> {
            schema.clone(),
            vec![
                Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
-                Arc::new(StringArray::from_iter(
-                    repeat(Some("hello world".to_string())).take(TOTAL),
-                )),
+                Arc::new(StringArray::from_iter(std::iter::repeat_n(
+                    Some("hello world".to_string()),
+                    TOTAL,
+                ))),
            ],
        )
        .unwrap()]