mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-19 09:16:45 +00:00
CR comments from https://github.com/quickwit-oss/tantivy/pull/2940 (#2952)
Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
This commit is contained in:
parent
70a8e56ee5
commit
abcf6754a2
3 changed files with 34 additions and 23 deletions
|
|
@ -92,7 +92,10 @@ impl FilterImplPerInstructionSet {
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
|
FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
|
||||||
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
|
#[cfg(all(target_arch = "aarch64", not(target_vendor = "apple")))]
|
||||||
FilterImplPerInstructionSet::SVE => sve::filter_vec_in_place(range, offset, output),
|
// SAFETY: SVE availability was verified by is_available() before selecting this impl.
|
||||||
|
FilterImplPerInstructionSet::SVE => unsafe {
|
||||||
|
sve::filter_vec_in_place(range, offset, output)
|
||||||
|
},
|
||||||
#[cfg(target_arch = "aarch64")]
|
#[cfg(target_arch = "aarch64")]
|
||||||
FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
|
FilterImplPerInstructionSet::Neon => neon::filter_vec_in_place(range, offset, output),
|
||||||
FilterImplPerInstructionSet::Scalar => {
|
FilterImplPerInstructionSet::Scalar => {
|
||||||
|
|
@ -266,15 +269,16 @@ mod tests {
|
||||||
start in 0u32..400u32,
|
start in 0u32..400u32,
|
||||||
end in 0u32..400u32,
|
end in 0u32..400u32,
|
||||||
offset in 0u32..2u32,
|
offset in 0u32..2u32,
|
||||||
mut vals in vals_strategy()) {
|
vals in vals_strategy()) {
|
||||||
for implementation in available_impls() {
|
for implementation in available_impls() {
|
||||||
if implementation == FilterImplPerInstructionSet::Scalar {
|
if implementation == FilterImplPerInstructionSet::Scalar {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let mut vals_clone = vals.clone();
|
let mut impl_output = vals.clone();
|
||||||
implementation.filter_vec_in_place(start..=end, offset, &mut vals);
|
let mut scalar_output = vals.clone();
|
||||||
FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut vals_clone);
|
implementation.filter_vec_in_place(start..=end, offset, &mut impl_output);
|
||||||
assert_eq!(&vals, &vals_clone);
|
FilterImplPerInstructionSet::Scalar.filter_vec_in_place(start..=end, offset, &mut scalar_output);
|
||||||
|
assert_eq!(&impl_output, &scalar_output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ unsafe fn compact(data: uint32x4_t, mask: u8) -> uint32x4_t {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Safe (not unsafe) because NEON is mandatory on aarch64: no runtime feature check needed.
|
||||||
#[inline(never)]
|
#[inline(never)]
|
||||||
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
||||||
let num_words = output.len() / NUM_LANES;
|
let num_words = output.len() / NUM_LANES;
|
||||||
|
|
@ -94,20 +95,24 @@ unsafe fn filter_vec_neon_aux(
|
||||||
// Index is a 4-bit mask: bit k=1 means lane k (bytes 4k..4k+3) is in-range.
|
// Index is a 4-bit mask: bit k=1 means lane k (bytes 4k..4k+3) is in-range.
|
||||||
// The j-th set bit determines which input lane goes to output position j.
|
// The j-th set bit determines which input lane goes to output position j.
|
||||||
const BYTE_SHUFFLE_TABLE: [[u8; 16]; 16] = [
|
const BYTE_SHUFFLE_TABLE: [[u8; 16]; 16] = [
|
||||||
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0000: none
|
[
|
||||||
[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0001: lane 0
|
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||||
[4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0010: lane 1
|
], // 0b0000: none
|
||||||
[0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0011: lanes 0,1
|
[0, 1, 2, 3, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0001: lane 0
|
||||||
[8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0100: lane 2
|
[4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0010: lane 1
|
||||||
[0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0101: lanes 0,2
|
[0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0011: lanes 0,1
|
||||||
[4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3], // 0b0110: lanes 1,2
|
[8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0100: lane 2
|
||||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3], // 0b0111: lanes 0,1,2
|
[0, 1, 2, 3, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0101: lanes 0,2
|
||||||
[12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1000: lane 3
|
[4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16, 16, 16, 16, 16], // 0b0110: lanes 1,2
|
||||||
[0, 1, 2, 3, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1001: lanes 0,3
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 16, 16, 16], // 0b0111: lanes 0,1,2
|
||||||
[4, 5, 6, 7, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1010: lanes 1,3
|
[
|
||||||
[0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 0, 1, 2, 3], // 0b1011: lanes 0,1,3
|
12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||||
[8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3], // 0b1100: lanes 2,3
|
], // 0b1000: lane 3
|
||||||
[0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3], // 0b1101: lanes 0,2,3
|
[0, 1, 2, 3, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1001: lanes 0,3
|
||||||
[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3], // 0b1110: lanes 1,2,3
|
[4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1010: lanes 1,3
|
||||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // 0b1111: all lanes
|
[0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1011: lanes 0,1,3
|
||||||
|
[8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16], // 0b1100: lanes 2,3
|
||||||
|
[0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1101: lanes 0,2,3
|
||||||
|
[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16], // 0b1110: lanes 1,2,3
|
||||||
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // 0b1111: all lanes
|
||||||
];
|
];
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,9 @@ unsafe fn num_lanes() -> usize {
|
||||||
vl
|
vl
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
// SAFETY: caller must ensure SVE is available (checked via is_aarch64_feature_detected!("sve")).
|
||||||
|
// Unlike NEON, SVE is optional on aarch64 and not guaranteed by the target architecture.
|
||||||
|
pub unsafe fn filter_vec_in_place(range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
|
||||||
if range.start() > range.end() {
|
if range.start() > range.end() {
|
||||||
output.clear();
|
output.clear();
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue