diff --git a/bitpacker/src/filter_vec/mod.rs b/bitpacker/src/filter_vec/mod.rs index 2d3b3b8df..d1038b9bd 100644 --- a/bitpacker/src/filter_vec/mod.rs +++ b/bitpacker/src/filter_vec/mod.rs @@ -1,4 +1,4 @@ -#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] +#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] use std::arch::is_aarch64_feature_detected; use std::ops::RangeInclusive; @@ -8,8 +8,8 @@ mod avx2; #[cfg(target_arch = "aarch64")] mod neon; -// SVE intrinsics are not exposed on aarch64-apple-darwin; only include on Linux/other. -#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] +// SVE intrinsics are nightly-only and not exposed on aarch64-apple-darwin. +#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] mod sve; mod scalar; @@ -19,7 +19,7 @@ mod scalar; enum FilterImplPerInstructionSet { #[cfg(target_arch = "x86_64")] AVX2 = 0u8, - #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] Sve = 3u8, #[cfg(target_arch = "aarch64")] Neon = 2u8, @@ -32,8 +32,9 @@ impl FilterImplPerInstructionSet { match *self { #[cfg(target_arch = "x86_64")] FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"), - #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] FilterImplPerInstructionSet::Sve => is_aarch64_feature_detected!("sve"), + // TIL Neon is required on aarch 64. #[cfg(target_arch = "aarch64")] FilterImplPerInstructionSet::Neon => true, FilterImplPerInstructionSet::Scalar => true, @@ -48,14 +49,21 @@ const IMPLS: [FilterImplPerInstructionSet; 2] = [ FilterImplPerInstructionSet::Scalar, ]; -// Non-Apple aarch64 (Graviton, etc.): try SVE, NEON, Scalar. -#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] +// Non-Apple aarch64 with nightly: try SVE, NEON, Scalar. +#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] const IMPLS: [FilterImplPerInstructionSet; 3] = [ FilterImplPerInstructionSet::Sve, FilterImplPerInstructionSet::Neon, FilterImplPerInstructionSet::Scalar, ]; +// Non-Apple aarch64 without nightly: SVE unavailable; use NEON or Scalar. +#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), not(nightly)))] +const IMPLS: [FilterImplPerInstructionSet; 2] = [ + FilterImplPerInstructionSet::Neon, + FilterImplPerInstructionSet::Scalar, +]; + // Apple aarch64 (M-series): SVE not available; use NEON or Scalar. #[cfg(all(target_arch = "aarch64", target_vendor = "apple"))] const IMPLS: [FilterImplPerInstructionSet; 2] = [ @@ -74,7 +82,7 @@ impl FilterImplPerInstructionSet { if code == FilterImplPerInstructionSet::AVX2 as u8 { return FilterImplPerInstructionSet::AVX2; } - #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] if code == FilterImplPerInstructionSet::Sve as u8 { return FilterImplPerInstructionSet::Sve; } @@ -90,7 +98,7 @@ impl FilterImplPerInstructionSet { match self { #[cfg(target_arch = "x86_64")] FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output), - #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] FilterImplPerInstructionSet::Sve => sve::filter_vec_in_place(range, offset, output), #[cfg(target_arch = "aarch64")] FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output), @@ -133,7 +141,7 @@ pub fn neon_filter_vec_in_place(range: RangeInclusive, offset: u32, output: } #[doc(hidden)] -#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] +#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] pub fn sve_filter_vec_in_place(range: RangeInclusive, offset: u32, output: &mut Vec) { sve::filter_vec_in_place(range, offset, output); } @@ -160,7 +168,7 @@ mod tests { } } - #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] #[test] fn test_instruction_set_to_code_from_code() { for instruction_set in [ @@ -173,6 +181,18 @@ mod tests { } } + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), not(nightly)))] + #[test] + fn test_instruction_set_to_code_from_code() { + for instruction_set in [ + FilterImplPerInstructionSet::Neon, + FilterImplPerInstructionSet::Scalar, + ] { + let code = instruction_set as u8; + assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code)); + } + } + #[cfg(all(target_arch = "aarch64", target_vendor = "apple"))] #[test] fn test_instruction_set_to_code_from_code() { @@ -209,11 +229,20 @@ mod tests { assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]); } + fn test_filter_impl_empty_range_aux(filter_impl: FilterImplPerInstructionSet) { + // start > end: RangeInclusive::contains always returns false; output must be empty. + // The SVE path's wrapping_sub would otherwise produce a huge range_width. + let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2]; + filter_impl.filter_vec_in_place(10..=5, 0, &mut output); + assert_eq!(&output, &[]); + } + fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) { test_filter_impl_empty_aux(filter_impl); test_filter_impl_simple_aux(filter_impl); test_filter_impl_simple_aux_shifted(filter_impl); test_filter_impl_simple_outside_i32_range(filter_impl); + test_filter_impl_empty_range_aux(filter_impl); } #[test] @@ -225,7 +254,7 @@ mod tests { } #[test] - #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))] + #[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))] fn test_filter_implementation_sve() { if FilterImplPerInstructionSet::Sve.is_available() { test_filter_impl_test_suite(FilterImplPerInstructionSet::Sve); diff --git a/bitpacker/src/filter_vec/sve.rs b/bitpacker/src/filter_vec/sve.rs index 67a2b8216..6f1e4d3c5 100644 --- a/bitpacker/src/filter_vec/sve.rs +++ b/bitpacker/src/filter_vec/sve.rs @@ -9,6 +9,10 @@ fn num_lanes() -> usize { } pub fn filter_vec_in_place(range: RangeInclusive, offset: u32, output: &mut Vec) { + if range.start() > range.end() { + output.clear(); + return; + } let vl = unsafe { num_lanes() }; let num_words = output.len() / vl; let range_start = *range.start(); diff --git a/bitpacker/src/lib.rs b/bitpacker/src/lib.rs index 76d68c0cc..baada1922 100644 --- a/bitpacker/src/lib.rs +++ b/bitpacker/src/lib.rs @@ -1,6 +1,6 @@ -// SVE/SVE2 intrinsics are nightly-only; enable them on non-Apple aarch64 targets. +// SVE/SVE2 intrinsics require nightly; only unlock when build.rs detects a nightly compiler. #![cfg_attr( - all(target_arch = "aarch64", not(target_vendor = "apple")), + all(target_arch = "aarch64", not(target_vendor = "apple"), nightly), feature(stdarch_aarch64_sve) )]