mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-19 09:16:45 +00:00
CR comments from https://github.com/quickwit-oss/tantivy/pull/2940 (#2952)
Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
This commit is contained in:
parent
70a8e56ee5
commit
abcf6754a2
3 changed files with 34 additions and 23 deletions
|
|
@ -92,7 +92,10 @@ impl FilterImplPerInstructionSet {
|
|||
#[cfg(target_arch = "x86_64")]
|
||||
FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
|
||||
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
|
||||
FilterImplPerInstructionSet::SVE => sve::filter_vec_in_place(range, offset, output),
|
||||
// SAFETY: SVE availability was verified by is_available() before selecting this impl.
|
||||
FilterImplPerInstructionSet::SVE => unsafe {
|
||||
sve::filter_vec_in_place(range, offset, output)
|
||||
},
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
|
||||
FilterImplPerInstructionSet::Scalar => {
|
||||
|
|
@ -266,15 +269,16 @@ mod tests {
|
|||
start in 0u32..400u32,
|
||||
end in 0u32..400u32,
|
||||
offset in 0u32..2u32,
|
||||
mut vals in vals_strategy()) {
|
||||
vals in vals_strategy()) {
|
||||
for implementation in available_impls() {
|
||||
if implementation == FilterImplPerInstructionSet::Scalar {
|
||||
continue;
|
||||
}
|
||||
let mut vals_clone = vals.clone();
|
||||
implementation.filter_vec_in_place(start..=end, offset, &mut vals);
|
||||
FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut vals_clone);
|
||||
assert_eq!(&vals, &vals_clone);
|
||||
let mut impl_output = vals.clone();
|
||||
let mut scalar_output = vals.clone();
|
||||
implementation.filter_vec_in_place(start..=end, offset, &mut impl_output);
|
||||
FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut scalar_output);
|
||||
assert_eq!(&impl_output, &scalar_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ unsafe fn compact(data: uint32x4_t, mask: u8) -> uint32x4_t {
|
|||
}
|
||||
}
|
||||
|
||||
// Safe (not unsafe) because NEON is mandatory on aarch64: no runtime feature check needed.
|
||||
#[inline(never)]
|
||||
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
||||
let num_words = output.len() / NUM_LANES;
|
||||
|
|
@ -94,20 +95,24 @@ unsafe fn filter_vec_neon_aux(
|
|||
// Index is a 4-bit mask: bit k=1 means lane k (bytes 4k..4k+3) is in-range.
|
||||
// The j-th set bit determines which input lane goes to output position j.
|
||||
const BYTE_SHUFFLE_TABLE: [[u8; 16]; 16] = [
|
||||
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0000: none
|
||||
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0001: lane 0
|
||||
[4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0010: lane 1
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0011: lanes 0,1
|
||||
[8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0100: lane 2
|
||||
[0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0101: lanes 0,2
|
||||
[4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0110: lanes 1,2
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3], // 0b0111: lanes 0,1,2
|
||||
[12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1000: lane 3
|
||||
[0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1001: lanes 0,3
|
||||
[4, 5, 6, 7, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1010: lanes 1,3
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 0, 1, 2, 3], // 0b1011: lanes 0,1,3
|
||||
[8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1100: lanes 2,3
|
||||
[0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3], // 0b1101: lanes 0,2,3
|
||||
[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3], // 0b1110: lanes 1,2,3
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // 0b1111: all lanes
|
||||
[
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
], // 0b0000: none
|
||||
[0, 1, 2, 3, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0001: lane 0
|
||||
[4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0010: lane 1
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0011: lanes 0,1
|
||||
[8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0100: lane 2
|
||||
[0, 1, 2, 3, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0101: lanes 0,2
|
||||
[4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0110: lanes 1,2
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16], // 0b0111: lanes 0,1,2
|
||||
[
|
||||
12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
], // 0b1000: lane 3
|
||||
[0, 1, 2, 3, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1001: lanes 0,3
|
||||
[4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1010: lanes 1,3
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1011: lanes 0,1,3
|
||||
[8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1100: lanes 2,3
|
||||
[0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1101: lanes 0,2,3
|
||||
[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1110: lanes 1,2,3
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // 0b1111: all lanes
|
||||
];
|
||||
|
|
|
|||
|
|
@ -15,7 +15,9 @@ unsafe fn num_lanes() -> usize {
|
|||
vl
|
||||
}
|
||||
|
||||
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
||||
// SAFETY: caller must ensure SVE is available (checked via is_aarch64_feature_detected!("sve")).
|
||||
// Unlike NEON, SVE is optional on aarch64 and not guaranteed by the target architecture.
|
||||
pub unsafe fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
||||
if range.start() > range.end() {
|
||||
output.clear();
|
||||
return;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue