aggregation/terms: tidy fused term×histogram grid construction

Rename the value threaded through build_segment_term_collector and
maybe_build_collector from max_term_id to col_max_val/max_column_val — it
is the column's max value, only later reused as the max term id. Make the
grid-size arithmetic overflow-/zero-safe (saturating_add, checked_div).
This commit is contained in:
Pascal Seitz 2026-06-15 17:09:50 +02:00 committed by PSeitz
commit 3ca510dff0
3 changed files with 15 additions and 19 deletions

View file

@ -636,11 +636,7 @@ impl SegmentHistogramCollector<()> {
) -> Self {
let interval = req_data.req.interval;
let offset = req_data.offset;
let num_parents = if num_time_buckets == 0 {
0
} else {
counts.len() / num_time_buckets
};
let num_parents = counts.len().checked_div(num_time_buckets).unwrap_or(0);
let parent_buckets = (0..num_parents)
.map(|t| {
let row = &counts[t * num_time_buckets..(t + 1) * num_time_buckets];

View file

@ -376,7 +376,7 @@ pub(crate) fn build_segment_term_collector(
// Let's see if we can use a vec to aggregate our data
// instead of a hashmap.
let col_max_value = terms_req_data.accessor.max_value();
let max_term_id: u64 =
let max_column_val: u64 =
col_max_value.max(terms_req_data.missing_value_for_accessor.unwrap_or(0u64));
// Fused fast path: low-cardinality terms × a single `histogram`/`date_histogram` leaf over full
@ -385,7 +385,7 @@ pub(crate) fn build_segment_term_collector(
req_data,
node,
&terms_req_data,
max_term_id,
max_column_val,
is_top_level,
)? {
return Ok(collector);
@ -399,30 +399,30 @@ pub(crate) fn build_segment_term_collector(
let mut bucket_id_provider = BucketIdProvider::default();
// Decide which bucket storage is best suited for this aggregation.
if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
let term_buckets = VecTermBucketsNoAgg::new(max_term_id + 1, &mut bucket_id_provider);
if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
let term_buckets = VecTermBucketsNoAgg::new(max_column_val + 1, &mut bucket_id_provider);
let collector: SegmentTermCollector<_, HighCardSubAggBuffer> = SegmentTermCollector {
parent_buckets: vec![term_buckets],
sub_agg: None,
bucket_id_provider,
max_term_id,
max_term_id: max_column_val,
terms_req_data,
};
Ok(Box::new(collector))
} else if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC {
let term_buckets = VecTermBuckets::new(max_term_id + 1, &mut bucket_id_provider);
} else if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC {
let term_buckets = VecTermBuckets::new(max_column_val + 1, &mut bucket_id_provider);
let sub_agg = sub_agg_collector.map(LowCardBufferedSubAggs::new);
let collector: SegmentTermCollector<_, LowCardSubAggBuffer> = SegmentTermCollector {
parent_buckets: vec![term_buckets],
sub_agg,
bucket_id_provider,
max_term_id,
max_term_id: max_column_val,
terms_req_data,
};
Ok(Box::new(collector))
} else if max_term_id < 8_000_000 && is_top_level {
} else if max_column_val < 8_000_000 && is_top_level {
let term_buckets: PagedTermMap =
PagedTermMap::new(max_term_id + 1, &mut bucket_id_provider);
PagedTermMap::new(max_column_val + 1, &mut bucket_id_provider);
// Build sub-aggregation blueprint (flat pairs)
let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggBuffer> =
@ -430,7 +430,7 @@ pub(crate) fn build_segment_term_collector(
parent_buckets: vec![term_buckets],
sub_agg,
bucket_id_provider,
max_term_id,
max_term_id: max_column_val,
terms_req_data,
};
Ok(Box::new(collector))
@ -443,7 +443,7 @@ pub(crate) fn build_segment_term_collector(
parent_buckets: vec![term_buckets],
sub_agg,
bucket_id_provider,
max_term_id,
max_term_id: max_column_val,
terms_req_data,
};
Ok(Box::new(collector))

View file

@ -232,7 +232,7 @@ pub(super) fn maybe_build_collector(
agg_data: &mut AggregationsSegmentCtx,
node: &AggRefNode,
terms_req_data: &TermsAggReqData,
max_term_id: u64,
col_max_val: u64,
is_top_level: bool,
) -> crate::Result<Option<Box<dyn SegmentAggregationCollector>>> {
// Both columns must be full (one value per doc) so their values align positionally with `docs`
@ -268,7 +268,7 @@ pub(super) fn maybe_build_collector(
else {
return Ok(None);
};
let num_terms = (max_term_id + 1) as usize;
let num_terms = col_max_val.saturating_add(1) as usize;
if num_terms.saturating_mul(range.len) > MAX_FUSED_GRID_BUCKETS {
return Ok(None);
}