Codex CR comments

This commit is contained in:
Paul Masurel
2026-05-27 23:57:05 +02:00
parent 465a761b2f
commit 6dd0be28cf
3 changed files with 47 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
use std::arch::is_aarch64_feature_detected;
use std::ops::RangeInclusive;
@@ -8,8 +8,8 @@ mod avx2;
#[cfg(target_arch = "aarch64")]
mod neon;
// SVE intrinsics are not exposed on aarch64-apple-darwin; only include on Linux/other.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
// SVE intrinsics are nightly-only and not exposed on aarch64-apple-darwin.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
mod sve;
mod scalar;
@@ -19,7 +19,7 @@ mod scalar;
enum FilterImplPerInstructionSet {
#[cfg(target_arch = "x86_64")]
AVX2 = 0u8,
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
Sve = 3u8,
#[cfg(target_arch = "aarch64")]
Neon = 2u8,
@@ -32,8 +32,9 @@ impl FilterImplPerInstructionSet {
match *self {
#[cfg(target_arch = "x86_64")]
FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"),
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
FilterImplPerInstructionSet::Sve => is_aarch64_feature_detected!("sve"),
// TIL Neon is required on aarch 64.
#[cfg(target_arch = "aarch64")]
FilterImplPerInstructionSet::Neon => true,
FilterImplPerInstructionSet::Scalar => true,
@@ -48,14 +49,21 @@ const IMPLS: [FilterImplPerInstructionSet; 2] = [
FilterImplPerInstructionSet::Scalar,
];
// Non-Apple aarch64 (Graviton, etc.): try SVE, NEON, Scalar.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
// Non-Apple aarch64 with nightly: try SVE, NEON, Scalar.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
const IMPLS: [FilterImplPerInstructionSet; 3] = [
FilterImplPerInstructionSet::Sve,
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
];
// Non-Apple aarch64 without nightly: SVE unavailable; use NEON or Scalar.
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), not(nightly)))]
const IMPLS: [FilterImplPerInstructionSet; 2] = [
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
];
// Apple aarch64 (M-series): SVE not available; use NEON or Scalar.
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
const IMPLS: [FilterImplPerInstructionSet; 2] = [
@@ -74,7 +82,7 @@ impl FilterImplPerInstructionSet {
if code == FilterImplPerInstructionSet::AVX2 as u8 {
return FilterImplPerInstructionSet::AVX2;
}
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
if code == FilterImplPerInstructionSet::Sve as u8 {
return FilterImplPerInstructionSet::Sve;
}
@@ -90,7 +98,7 @@ impl FilterImplPerInstructionSet {
match self {
#[cfg(target_arch = "x86_64")]
FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
FilterImplPerInstructionSet::Sve => sve::filter_vec_in_place(range, offset, output),
#[cfg(target_arch = "aarch64")]
FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
@@ -133,7 +141,7 @@ pub fn neon_filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output:
}
#[doc(hidden)]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
pub fn sve_filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
sve::filter_vec_in_place(range, offset, output);
}
@@ -160,7 +168,7 @@ mod tests {
}
}
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
#[test]
fn test_instruction_set_to_code_from_code() {
for instruction_set in [
@@ -173,6 +181,18 @@ mod tests {
}
}
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), not(nightly)))]
#[test]
fn test_instruction_set_to_code_from_code() {
for instruction_set in [
FilterImplPerInstructionSet::Neon,
FilterImplPerInstructionSet::Scalar,
] {
let code = instruction_set as u8;
assert_eq!(instruction_set, FilterImplPerInstructionSet::from(code));
}
}
#[cfg(all(target_arch = "aarch64", target_vendor = "apple"))]
#[test]
fn test_instruction_set_to_code_from_code() {
@@ -209,11 +229,20 @@ mod tests {
assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]);
}
fn test_filter_impl_empty_range_aux(filter_impl: FilterImplPerInstructionSet) {
// start > end: RangeInclusive::contains always returns false; output must be empty.
// The SVE path's wrapping_sub would otherwise produce a huge range_width.
let mut output = vec![3, 2, 1, 5, 11, 2, 5, 10, 2];
filter_impl.filter_vec_in_place(10..=5, 0, &mut output);
assert_eq!(&output, &[]);
}
fn test_filter_impl_test_suite(filter_impl: FilterImplPerInstructionSet) {
test_filter_impl_empty_aux(filter_impl);
test_filter_impl_simple_aux(filter_impl);
test_filter_impl_simple_aux_shifted(filter_impl);
test_filter_impl_simple_outside_i32_range(filter_impl);
test_filter_impl_empty_range_aux(filter_impl);
}
#[test]
@@ -225,7 +254,7 @@ mod tests {
}
#[test]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple"), nightly))]
fn test_filter_implementation_sve() {
if FilterImplPerInstructionSet::Sve.is_available() {
test_filter_impl_test_suite(FilterImplPerInstructionSet::Sve);

View File

@@ -9,6 +9,10 @@ fn num_lanes() -> usize {
}
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
if range.start() > range.end() {
output.clear();
return;
}
let vl = unsafe { num_lanes() };
let num_words = output.len() / vl;
let range_start = *range.start();

View File

@@ -1,6 +1,6 @@
// SVE/SVE2 intrinsics are nightly-only; enable them on non-Apple aarch64 targets.
// SVE/SVE2 intrinsics require nightly; only unlock when build.rs detects a nightly compiler.
#![cfg_attr(
all(target_arch = "aarch64", not(target_vendor = "apple")),
all(target_arch = "aarch64", not(target_vendor = "apple"), nightly),
feature(stdarch_aarch64_sve)
)]