mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-19 09:16:45 +00:00
aggregation/terms: tidy fused term×histogram grid construction
Rename the value threaded through build_segment_term_collector and maybe_build_collector from max_term_id to col_max_val/max_column_val — it is the column's max value, only later reused as the max term id. Make the grid-size arithmetic overflow-/zero-safe (saturating_add, checked_div).
This commit is contained in:
parent
3cb400c300
commit
3ca510dff0
3 changed files with 15 additions and 19 deletions
|
|
@ -636,11 +636,7 @@ impl SegmentHistogramCollector<()> {
|
|||
) -> Self {
|
||||
let interval = req_data.req.interval;
|
||||
let offset = req_data.offset;
|
||||
let num_parents = if num_time_buckets == 0 {
|
||||
0
|
||||
} else {
|
||||
counts.len() / num_time_buckets
|
||||
};
|
||||
let num_parents = counts.len().checked_div(num_time_buckets).unwrap_or(0);
|
||||
let parent_buckets = (0..num_parents)
|
||||
.map(|t| {
|
||||
let row = &counts[t * num_time_buckets..(t + 1) * num_time_buckets];
|
||||
|
|
|
|||
|
|
@ -376,7 +376,7 @@ pub(crate) fn build_segment_term_collector(
|
|||
// Let's see if we can use a vec to aggregate our data
|
||||
// instead of a hashmap.
|
||||
let col_max_value = terms_req_data.accessor.max_value();
|
||||
let max_term_id: u64 =
|
||||
let max_column_val: u64 =
|
||||
col_max_value.max(terms_req_data.missing_value_for_accessor.unwrap_or(0u64));
|
||||
|
||||
// Fused fast path: low-cardinality terms × a single `histogram`/`date_histogram` leaf over full
|
||||
|
|
@ -385,7 +385,7 @@ pub(crate) fn build_segment_term_collector(
|
|||
req_data,
|
||||
node,
|
||||
&terms_req_data,
|
||||
max_term_id,
|
||||
max_column_val,
|
||||
is_top_level,
|
||||
)? {
|
||||
return Ok(collector);
|
||||
|
|
@ -399,30 +399,30 @@ pub(crate) fn build_segment_term_collector(
|
|||
|
||||
let mut bucket_id_provider = BucketIdProvider::default();
|
||||
// Decide which bucket storage is best suited for this aggregation.
|
||||
if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
|
||||
let term_buckets = VecTermBucketsNoAgg::new(max_term_id + 1, &mut bucket_id_provider);
|
||||
if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
|
||||
let term_buckets = VecTermBucketsNoAgg::new(max_column_val + 1, &mut bucket_id_provider);
|
||||
let collector: SegmentTermCollector<_, HighCardSubAggBuffer> = SegmentTermCollector {
|
||||
parent_buckets: vec![term_buckets],
|
||||
sub_agg: None,
|
||||
bucket_id_provider,
|
||||
max_term_id,
|
||||
max_term_id: max_column_val,
|
||||
terms_req_data,
|
||||
};
|
||||
Ok(Box::new(collector))
|
||||
} else if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC {
|
||||
let term_buckets = VecTermBuckets::new(max_term_id + 1, &mut bucket_id_provider);
|
||||
} else if is_top_level && max_column_val < MAX_NUM_TERMS_FOR_VEC {
|
||||
let term_buckets = VecTermBuckets::new(max_column_val + 1, &mut bucket_id_provider);
|
||||
let sub_agg = sub_agg_collector.map(LowCardBufferedSubAggs::new);
|
||||
let collector: SegmentTermCollector<_, LowCardSubAggBuffer> = SegmentTermCollector {
|
||||
parent_buckets: vec![term_buckets],
|
||||
sub_agg,
|
||||
bucket_id_provider,
|
||||
max_term_id,
|
||||
max_term_id: max_column_val,
|
||||
terms_req_data,
|
||||
};
|
||||
Ok(Box::new(collector))
|
||||
} else if max_term_id < 8_000_000 && is_top_level {
|
||||
} else if max_column_val < 8_000_000 && is_top_level {
|
||||
let term_buckets: PagedTermMap =
|
||||
PagedTermMap::new(max_term_id + 1, &mut bucket_id_provider);
|
||||
PagedTermMap::new(max_column_val + 1, &mut bucket_id_provider);
|
||||
// Build sub-aggregation blueprint (flat pairs)
|
||||
let sub_agg = sub_agg_collector.map(BufferedSubAggs::new);
|
||||
let collector: SegmentTermCollector<PagedTermMap, HighCardSubAggBuffer> =
|
||||
|
|
@ -430,7 +430,7 @@ pub(crate) fn build_segment_term_collector(
|
|||
parent_buckets: vec![term_buckets],
|
||||
sub_agg,
|
||||
bucket_id_provider,
|
||||
max_term_id,
|
||||
max_term_id: max_column_val,
|
||||
terms_req_data,
|
||||
};
|
||||
Ok(Box::new(collector))
|
||||
|
|
@ -443,7 +443,7 @@ pub(crate) fn build_segment_term_collector(
|
|||
parent_buckets: vec![term_buckets],
|
||||
sub_agg,
|
||||
bucket_id_provider,
|
||||
max_term_id,
|
||||
max_term_id: max_column_val,
|
||||
terms_req_data,
|
||||
};
|
||||
Ok(Box::new(collector))
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ pub(super) fn maybe_build_collector(
|
|||
agg_data: &mut AggregationsSegmentCtx,
|
||||
node: &AggRefNode,
|
||||
terms_req_data: &TermsAggReqData,
|
||||
max_term_id: u64,
|
||||
col_max_val: u64,
|
||||
is_top_level: bool,
|
||||
) -> crate::Result<Option<Box<dyn SegmentAggregationCollector>>> {
|
||||
// Both columns must be full (one value per doc) so their values align positionally with `docs`
|
||||
|
|
@ -268,7 +268,7 @@ pub(super) fn maybe_build_collector(
|
|||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
let num_terms = (max_term_id + 1) as usize;
|
||||
let num_terms = col_max_val.saturating_add(1) as usize;
|
||||
if num_terms.saturating_mul(range.len) > MAX_FUSED_GRID_BUCKETS {
|
||||
return Ok(None);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue