fix term aggregation u32::MAX overflow issue

This commit is contained in:
Pascal Seitz 2026-06-18 09:33:52 +02:00 committed by PSeitz
commit 1e859fd78d
4 changed files with 22 additions and 4 deletions

View file

@ -275,7 +275,7 @@ impl SegmentCompositeCollector {
dict.insert( dict.insert(
key, key,
IntermediateCompositeBucketEntry { IntermediateCompositeBucketEntry {
doc_count: agg.count, doc_count: agg.count as u64,
sub_aggregation: sub_aggregation_res, sub_aggregation: sub_aggregation_res,
}, },
); );

View file

@ -957,7 +957,7 @@ fn into_intermediate_bucket_entry(
)?; )?;
} }
Ok(IntermediateTermBucketEntry { Ok(IntermediateTermBucketEntry {
doc_count: bucket.count, doc_count: bucket.count as u64,
sub_aggregation: sub_aggregation_res, sub_aggregation: sub_aggregation_res,
}) })
} }

View file

@ -98,7 +98,7 @@ impl SegmentAggregationCollector for TermMissingAgg {
let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize]; let missing_count = &self.missing_count_per_bucket[parent_bucket_id as usize];
let mut missing_entry = IntermediateTermBucketEntry { let mut missing_entry = IntermediateTermBucketEntry {
doc_count: missing_count.missing_count, doc_count: missing_count.missing_count as u64,
sub_aggregation: Default::default(), sub_aggregation: Default::default(),
}; };
if let Some(sub_agg) = &mut self.sub_agg { if let Some(sub_agg) = &mut self.sub_agg {

View file

@ -930,7 +930,7 @@ impl IntermediateRangeBucketEntry {
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
pub struct IntermediateTermBucketEntry { pub struct IntermediateTermBucketEntry {
/// The number of documents in the bucket. /// The number of documents in the bucket.
pub doc_count: u32, pub doc_count: u64,
/// The sub_aggregation in this bucket. /// The sub_aggregation in this bucket.
pub sub_aggregation: IntermediateAggregationResults, pub sub_aggregation: IntermediateAggregationResults,
} }
@ -1240,6 +1240,24 @@ mod tests {
assert_eq!(tree_left, tree_expected); assert_eq!(tree_left, tree_expected);
} }
#[test]
fn test_term_bucket_doc_count_no_u32_overflow() {
// Two segments each contributing (u32::MAX - 100) docs to the same term. Summing them
// overflowed when doc_count was u32.
let per_segment = u32::MAX as u64 - 100;
let mut entry = IntermediateTermBucketEntry {
doc_count: per_segment,
sub_aggregation: Default::default(),
};
entry
.merge_fruits(IntermediateTermBucketEntry {
doc_count: per_segment,
sub_aggregation: Default::default(),
})
.unwrap();
assert_eq!(entry.doc_count, per_segment * 2);
}
#[test] #[test]
fn test_merge_fruits_tree_empty() { fn test_merge_fruits_tree_empty() {
let mut tree_left = get_intermediate_tree_with_ranges(&[ let mut tree_left = get_intermediate_tree_with_ranges(&[