diff --git a/src/aggregation/agg_data.rs b/src/aggregation/agg_data.rs
index 9db6c6372..946c5a5a0 100644
--- a/src/aggregation/agg_data.rs
+++ b/src/aggregation/agg_data.rs
@@ -868,7 +868,7 @@ fn build_terms_or_cardinality_nodes(
         });
     }
 
-    // Add one node per accessor to mirror previous behavior and allow per-type missing handling.
+    // Add one node per accessor
     for (accessor, column_type) in column_and_types {
         let missing_value_for_accessor = if use_special_missing_agg {
             None
diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs
index 0e966135c..99a156902 100644
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -333,6 +333,10 @@ impl TermsAggregationInternal {
     }
 }
 
+/// The treshold for maximum number of terms to use a Vec-backed bucket storage.
+/// TODO: Benchmark to validate the threshold
+pub const MAX_NUM_TERMS_FOR_VEC: u64 = 100;
+
 /// Build a concrete `SegmentTermCollector` with either a Vec- or HashMap-backed
 /// bucket storage, depending on the column type and aggregation level.
 pub(crate) fn build_segment_term_collector(
@@ -366,16 +370,11 @@ pub(crate) fn build_segment_term_collector(
     // Build sub-aggregation blueprint if there are children.
     let has_sub_aggregations = !node.children.is_empty();
 
-    // Decide whether to use a Vec-backed or HashMap-backed bucket storage.
-
     // TODO: A better metric instead of is_top_level would be the number of buckets expected.
     // E.g. If term agg is not top level, but the parent is a bucket agg with less than 10 buckets,
     // we can still use Vec.
     let is_top_level = terms_req_data.is_top_level;
 
-    // TODO: Benchmark to validate the threshold
-    const MAX_NUM_TERMS_FOR_VEC: u64 = 100;
-
     // Let's see if we can use a vec to aggregate our data
     // instead of a hashmap.
     let col_max_value = terms_req_data.accessor.max_value();
@@ -389,8 +388,7 @@ pub(crate) fn build_segment_term_collector(
     };
 
     let mut bucket_id_provider = BucketIdProvider::default();
-    // - use a Vec instead of a hashmap for our aggregation.
-
+    // Decide which bucket storage is best suited for this aggregation.
     if is_top_level && max_term_id < MAX_NUM_TERMS_FOR_VEC && !has_sub_aggregations {
         let term_buckets = VecTermBucketsNoAgg::new(max_term_id + 1, &mut bucket_id_provider);
         let collector: SegmentTermCollector<_, true> = SegmentTermCollector {
diff --git a/src/aggregation/cached_sub_aggs.rs b/src/aggregation/cached_sub_aggs.rs
index f133c0273..94ea93fea 100644
--- a/src/aggregation/cached_sub_aggs.rs
+++ b/src/aggregation/cached_sub_aggs.rs
@@ -1,5 +1,6 @@
 use super::segment_agg_result::SegmentAggregationCollector;
 use crate::aggregation::agg_data::AggregationsSegmentCtx;
+use crate::aggregation::bucket::MAX_NUM_TERMS_FOR_VEC;
 use crate::aggregation::BucketId;
 use crate::DocId;
 
@@ -28,9 +29,13 @@ pub(crate) struct CachedSubAggs<const LOWCARD: bool = false> {
     /// The outer Vec is indexed by BucketId.
     per_bucket_docs: Vec<Vec<DocId>>,
     /// Only used when LOWCARD is false.
-    /// For higher cardinalities we use a partitioned approach to store
     ///
-    /// partitioned Vec<(BucketId, DocId)> pairs to improve grouping locality.
+    /// This weird partitioning is used to do some cheap grouping on the bucket ids.
+    /// bucket ids are dense, e.g. when we don't detect the cardinality as low cardinality,
+    /// but there are just 16 bucket ids, each bucket id will go to its own partition.
+    ///
+    /// We want to keep this cheap, because high cardinality aggregations can have a lot of
+    /// buckets, and they may be nothing to group.
     partitions: [PartitionEntry; NUM_PARTITIONS],
     pub(crate) sub_agg_collector: Box<dyn SegmentAggregationCollector>,
     num_docs: usize,
@@ -110,7 +115,12 @@ impl<const LOWCARD: bool> CachedSubAggs<LOWCARD> {
             // the FLUSH_THRESHOLD, but never flush any buckets. (except the final flush)
             let mut bucket_treshold = FLUSH_THRESHOLD / (self.per_bucket_docs.len().max(1) * 2);
             const _: () = {
-                assert!(std::mem::size_of::<u64>() == 8, "expected 64-bit u64");
+                // MAX_NUM_TERMS_FOR_VEC == LOWCARD threshold
+                let bucket_treshold = FLUSH_THRESHOLD / (MAX_NUM_TERMS_FOR_VEC as usize * 2);
+                assert!(
+                    bucket_treshold > 0,
+                    "Bucket threshold must be greater than 0"
+                );
             };
             if force {
                 bucket_treshold = 0;
diff --git a/src/aggregation/metric/top_hits.rs b/src/aggregation/metric/top_hits.rs
index 02b6be41b..1adf41bbb 100644
--- a/src/aggregation/metric/top_hits.rs
+++ b/src/aggregation/metric/top_hits.rs
@@ -20,7 +20,6 @@ use crate::collector::sort_key::ReverseComparator;
 use crate::collector::TopNComputer;
 use crate::schema::OwnedValue;
 use crate::{DocAddress, DocId, SegmentOrdinal};
-// duplicate import removed; already imported above
 
 /// Contains all information required by the TopHitsSegmentCollector to perform the
 /// top_hits aggregation on a segment.