mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-14 10:30:40 +00:00
ci(rust): fix MSRV check (#2940)
Realized our MSRV check was inert because `rust-toolchain.toml` was overriding the Rust version. We set the `RUSTUP_TOOLCHAIN` environment variable, which overrides that. Also needed to update to MSRV 1.88 (due to dependencies like Lance and DataFusion) and fix some clippy warnings.
This commit is contained in:
4
.github/workflows/rust.yml
vendored
4
.github/workflows/rust.yml
vendored
@@ -181,7 +181,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
|
||||
msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
|
||||
env:
|
||||
# Need up-to-date compilers for kernels
|
||||
CC: clang-18
|
||||
@@ -212,4 +212,6 @@ jobs:
|
||||
cargo update -p aws-sdk-sts --precise 1.51.0
|
||||
cargo update -p home --precise 0.5.9
|
||||
- name: cargo +${{ matrix.msrv }} check
|
||||
env:
|
||||
RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
|
||||
run: cargo check --profile ci --workspace --tests --benches --all-features
|
||||
|
||||
@@ -12,7 +12,7 @@ repository = "https://github.com/lancedb/lancedb"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
rust-version = "1.88.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=2.0.0-rc.1", default-features = false, "tag" = "v2.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
|
||||
|
||||
@@ -7,7 +7,7 @@ license.workspace = true
|
||||
repository.workspace = true
|
||||
keywords.workspace = true
|
||||
categories.workspace = true
|
||||
rust-version = "1.75.0"
|
||||
rust-version = "1.88.0"
|
||||
|
||||
[lib]
|
||||
name = "_lancedb"
|
||||
|
||||
@@ -171,7 +171,7 @@ impl Shuffler {
|
||||
// This is kind of an annoying limitation but if we allow runt clumps from batches then
|
||||
// clumps will get unaligned and we will mess up the clumps when we do the in-memory
|
||||
// shuffle step. If this is a problem we can probably figure out a better way to do this.
|
||||
if !is_last && batch.num_rows() as u64 % clump_size != 0 {
|
||||
if !is_last && !(batch.num_rows() as u64).is_multiple_of(clump_size) {
|
||||
return Err(Error::Runtime {
|
||||
message: format!(
|
||||
"Expected batch size ({}) to be divisible by clump size ({})",
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::{
|
||||
iter,
|
||||
sync::{
|
||||
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
|
||||
Arc,
|
||||
},
|
||||
use std::sync::{
|
||||
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
|
||||
Arc,
|
||||
};
|
||||
|
||||
use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
|
||||
@@ -158,7 +155,7 @@ impl Splitter {
|
||||
remaining_in_split
|
||||
};
|
||||
|
||||
split_ids.extend(iter::repeat(split_id as u64).take(rows_to_add as usize));
|
||||
split_ids.extend(std::iter::repeat_n(split_id as u64, rows_to_add as usize));
|
||||
if done {
|
||||
// Quit early if we've run out of splits
|
||||
break;
|
||||
@@ -662,7 +659,7 @@ mod tests {
|
||||
assert_eq!(split_batch.num_rows(), total_split_sizes as usize);
|
||||
let mut expected = Vec::with_capacity(total_split_sizes as usize);
|
||||
for (i, size) in expected_split_sizes.iter().enumerate() {
|
||||
expected.extend(iter::repeat(i as u64).take(*size as usize));
|
||||
expected.extend(std::iter::repeat_n(i as u64, *size as usize));
|
||||
}
|
||||
let expected = Arc::new(UInt64Array::from(expected)) as Arc<dyn Array>;
|
||||
|
||||
|
||||
@@ -297,10 +297,10 @@ impl IvfPqIndexBuilder {
|
||||
}
|
||||
|
||||
pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
|
||||
if dim % 16 == 0 {
|
||||
if dim.is_multiple_of(16) {
|
||||
// Should be more aggressive than this default.
|
||||
dim / 16
|
||||
} else if dim % 8 == 0 {
|
||||
} else if dim.is_multiple_of(8) {
|
||||
dim / 8
|
||||
} else {
|
||||
log::warn!(
|
||||
|
||||
@@ -2061,7 +2061,7 @@ impl NativeTable {
|
||||
return provided;
|
||||
}
|
||||
let suggested = suggested_num_sub_vectors(dim);
|
||||
if num_bits.is_some_and(|num_bits| num_bits == 4) && suggested % 2 != 0 {
|
||||
if num_bits.is_some_and(|num_bits| num_bits == 4) && !suggested.is_multiple_of(2) {
|
||||
// num_sub_vectors must be even when 4 bits are used
|
||||
suggested + 1
|
||||
} else {
|
||||
@@ -3402,7 +3402,6 @@ pub struct FragmentSummaryStats {
|
||||
#[cfg(test)]
|
||||
#[allow(deprecated)]
|
||||
mod tests {
|
||||
use std::iter;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@@ -4019,7 +4018,7 @@ mod tests {
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(offset..(offset + 10))),
|
||||
Arc::new(Int32Array::from_iter_values(iter::repeat(age).take(10))),
|
||||
Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(age, 10))),
|
||||
],
|
||||
)],
|
||||
schema,
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{HashMap, HashSet},
|
||||
iter::repeat,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
@@ -268,9 +267,10 @@ fn create_some_records() -> Result<impl IntoArrow> {
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(StringArray::from_iter(
|
||||
repeat(Some("hello world".to_string())).take(TOTAL),
|
||||
)),
|
||||
Arc::new(StringArray::from_iter(std::iter::repeat_n(
|
||||
Some("hello world".to_string()),
|
||||
TOTAL,
|
||||
))),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
|
||||
Reference in New Issue
Block a user