From f979a902ad8fe383a5aba84cd04e09df28cdc8e1 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Fri, 23 Jan 2026 15:57:09 -0800 Subject: [PATCH] ci(rust): fix MSRV check (#2940) Realized our MSRV check was inert because `rust-toolchain.toml` was overriding the Rust version. We set the `RUSTUP_TOOLCHAIN` environment variable, which overrides that. Also needed to update to MSRV 1.88 (due to dependencies like Lance and DataFusion) and fix some clippy warnings. --- .github/workflows/rust.yml | 4 +++- Cargo.toml | 2 +- python/Cargo.toml | 2 +- rust/lancedb/src/dataloader/permutation/shuffle.rs | 2 +- rust/lancedb/src/dataloader/permutation/split.rs | 13 +++++-------- rust/lancedb/src/index/vector.rs | 4 ++-- rust/lancedb/src/table.rs | 5 ++--- rust/lancedb/tests/embedding_registry_test.rs | 8 ++++---- 8 files changed, 19 insertions(+), 21 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 22f3bc6b2..8fab085d6 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -181,7 +181,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml + msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml env: # Need up-to-date compilers for kernels CC: clang-18 @@ -212,4 +212,6 @@ jobs: cargo update -p aws-sdk-sts --precise 1.51.0 cargo update -p home --precise 0.5.9 - name: cargo +${{ matrix.msrv }} check + env: + RUSTUP_TOOLCHAIN: ${{ matrix.msrv }} run: cargo check --profile ci --workspace --tests --benches --all-features diff --git a/Cargo.toml b/Cargo.toml index d14e26a96..77e62b4e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ repository = "https://github.com/lancedb/lancedb" description = "Serverless, low-latency vector database for AI applications" keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] -rust-version = "1.78.0" +rust-version = "1.88.0" [workspace.dependencies] lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" } diff --git a/python/Cargo.toml b/python/Cargo.toml index c1c68df6c..23d86fa74 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true repository.workspace = true keywords.workspace = true categories.workspace = true -rust-version = "1.75.0" +rust-version = "1.88.0" [lib] name = "_lancedb" diff --git a/rust/lancedb/src/dataloader/permutation/shuffle.rs b/rust/lancedb/src/dataloader/permutation/shuffle.rs index 0d1ec43e1..53f7e1af5 100644 --- a/rust/lancedb/src/dataloader/permutation/shuffle.rs +++ b/rust/lancedb/src/dataloader/permutation/shuffle.rs @@ -171,7 +171,7 @@ impl Shuffler { // This is kind of an annoying limitation but if we allow runt clumps from batches then // clumps will get unaligned and we will mess up the clumps when we do the in-memory // shuffle step. If this is a problem we can probably figure out a better way to do this. - if !is_last && batch.num_rows() as u64 % clump_size != 0 { + if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) { return Err(Error::Runtime { message: format!( "Expected batch size ({}) to be divisible by clump size ({})", diff --git a/rust/lancedb/src/dataloader/permutation/split.rs b/rust/lancedb/src/dataloader/permutation/split.rs index e7dc52830..2bcf1c1e0 100644 --- a/rust/lancedb/src/dataloader/permutation/split.rs +++ b/rust/lancedb/src/dataloader/permutation/split.rs @@ -1,12 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors -use std::{ - iter, - sync::{ - atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, - Arc, - }, +use std::sync::{ + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, + Arc, }; use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array}; @@ -158,7 +155,7 @@ impl Splitter { remaining_in_split }; - split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize)); + split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize)); if done { // Quit early if we've run out of splits break; @@ -662,7 +659,7 @@ mod tests { assert_eq!(split_batch.num_rows(), total_split_sizes as usize); let mut expected = Vec::with_capacity(total_split_sizes as usize); for (i, size) in expected_split_sizes.iter().enumerate() { - expected.extend(iter::repeat(i as u64).take(*size as usize)); + expected.extend(std::iter::repeat_n(i as u64, *size as usize)); } let expected = Arc::new(UInt64Array::from(expected)) as Arc; diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs index 6268b36cd..65990cf03 100644 --- a/rust/lancedb/src/index/vector.rs +++ b/rust/lancedb/src/index/vector.rs @@ -297,10 +297,10 @@ impl IvfPqIndexBuilder { } pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 { - if dim % 16 == 0 { + if dim.is_multiple_of(16) { // Should be more aggressive than this default. dim / 16 - } else if dim % 8 == 0 { + } else if dim.is_multiple_of(8) { dim / 8 } else { log::warn!( diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index f840f3e5a..e3743a4e7 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -2061,7 +2061,7 @@ impl NativeTable { return provided; } let suggested = suggested_num_sub_vectors(dim); - if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 { + if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) { // num_sub_vectors must be even when 4 bits are used suggested + 1 } else { @@ -3402,7 +3402,6 @@ pub struct FragmentSummaryStats { #[cfg(test)] #[allow(deprecated)] mod tests { - use std::iter; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; @@ -4019,7 +4018,7 @@ mod tests { schema.clone(), vec![ Arc::new(Int32Array::from_iter_values(offset..(offset + 10))), - Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))), + Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))), ], )], schema, diff --git a/rust/lancedb/tests/embedding_registry_test.rs b/rust/lancedb/tests/embedding_registry_test.rs index c87fad74f..4c636aad4 100644 --- a/rust/lancedb/tests/embedding_registry_test.rs +++ b/rust/lancedb/tests/embedding_registry_test.rs @@ -4,7 +4,6 @@ use std::{ borrow::Cow, collections::{HashMap, HashSet}, - iter::repeat, sync::Arc, }; @@ -268,9 +267,10 @@ fn create_some_records() -> Result { schema.clone(), vec![ Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)), - Arc::new(StringArray::from_iter( - repeat(Some("hello world".to_string())).take(TOTAL), - )), + Arc::new(StringArray::from_iter(std::iter::repeat_n( + Some("hello world".to_string()), + TOTAL, + ))), ], ) .unwrap()]