mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-19 09:16:45 +00:00
add comment, hoist variables
This commit is contained in:
parent
03520a0719
commit
ac7a3d347c
2 changed files with 26 additions and 4 deletions
|
|
@ -15,8 +15,25 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
|
|||
{
|
||||
#[inline]
|
||||
pub fn fetch_block<'a>(&'a mut self, docs: &'a [u32], accessor: &Column<T>) {
|
||||
if accessor.index.get_cardinality().is_full() {
|
||||
self.val_cache.resize(docs.len(), T::default());
|
||||
self.fetch_block_with_is_full(docs, accessor, accessor.index.get_cardinality().is_full());
|
||||
}
|
||||
|
||||
/// Like [`Self::fetch_block`] but takes the column's fullness instead of querying
|
||||
/// `accessor.index.get_cardinality()` each call — for callers that know it up front (e.g.
|
||||
/// checked once at construction). `is_full` must equal
|
||||
/// `accessor.index.get_cardinality().is_full()`.
|
||||
#[inline]
|
||||
pub fn fetch_block_with_is_full<'a>(
|
||||
&'a mut self,
|
||||
docs: &'a [u32],
|
||||
accessor: &Column<T>,
|
||||
is_full: bool,
|
||||
) {
|
||||
if is_full {
|
||||
// Skip the resize when already the right length (common case: fixed-size blocks).
|
||||
if self.val_cache.len() != docs.len() {
|
||||
self.val_cache.resize(docs.len(), T::default());
|
||||
}
|
||||
// When the docs form a contiguous ascending run we can fetch the values
|
||||
// as a single range. This lets codecs (e.g. bitpacked) bulk-decode the
|
||||
// slice instead of gathering value-by-value, and avoids per-value dynamic
|
||||
|
|
|
|||
|
|
@ -65,6 +65,9 @@ pub(crate) struct SegmentTermHistogramCollector {
|
|||
hist_block: ColumnBlockAccessor<u64>,
|
||||
/// No hard bounds, so every doc is in-bounds.
|
||||
all_docs_in_bounds: bool,
|
||||
/// Both columns are full (fused-path precondition); cached so `collect` skips the per-block
|
||||
/// cardinality lookup in `fetch_block`.
|
||||
is_full: bool,
|
||||
}
|
||||
|
||||
impl SegmentAggregationCollector for SegmentTermHistogramCollector {
|
||||
|
|
@ -132,9 +135,9 @@ impl SegmentAggregationCollector for SegmentTermHistogramCollector {
|
|||
// single `agg_data` scratch accessor). The collector owns all its inputs, so `collect`
|
||||
// doesn't touch `agg_data`.
|
||||
self.term_block
|
||||
.fetch_block(docs, &self.terms_req_data.accessor);
|
||||
.fetch_block_with_is_full(docs, &self.terms_req_data.accessor, self.is_full);
|
||||
self.hist_block
|
||||
.fetch_block(docs, &self.hist_req_data.accessor);
|
||||
.fetch_block_with_is_full(docs, &self.hist_req_data.accessor, self.is_full);
|
||||
|
||||
// Hoist the loop-invariant fields into locals: the optimizer can't prove the
|
||||
// `self.counts`/`self.term_counts` writes don't alias these `self` fields, so it can't keep
|
||||
|
|
@ -223,6 +226,7 @@ pub(super) fn maybe_build_collector(
|
|||
// using too much memory. We could check the maximum theoretical buckets up-front and pass
|
||||
// them down.
|
||||
let fuseable = is_top_level
|
||||
// TODO: We can easily support this
|
||||
&& terms_req_data.allowed_term_ids.is_none()
|
||||
&& terms_req_data.accessor.get_cardinality().is_full()
|
||||
// The flat counters are `u32`, bumped once per value, so no count can exceed the column's
|
||||
|
|
@ -273,6 +277,7 @@ pub(super) fn maybe_build_collector(
|
|||
term_block: ColumnBlockAccessor::default(),
|
||||
hist_block: ColumnBlockAccessor::default(),
|
||||
all_docs_in_bounds,
|
||||
is_full: terms_req_data.accessor.get_cardinality().is_full(),
|
||||
})))
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue