perf: Optimize TermSet for very large sets of terms. (#75)

* Removes allocation in a bunch of places
* Removes sorting of terms if we're going to use the fast field execution method
* Adds back the (accidentally dropped) cardinality threshold
* Removes `bool` support -- using the posting lists is always more efficient for a `bool`, since there are at most two of them
* More eagerly constructs the term `HashSet` so that it happens once, rather than once per segment
This commit is contained in:
Stu Hood
2025-10-25 14:40:00 -07:00
committed by Stu Hood
parent b3541d10e1
commit 92c784f697
3 changed files with 150 additions and 108 deletions

View File

@@ -332,6 +332,21 @@ where B: AsRef<[u8]>
self.get_fast_type::<u64>()
}
/// Returns the `u64` representation of a FastValue stored in a term.
///
/// Returns `None` if the term is not of a FastValue type, or if the term byte representation
/// is invalid.
pub fn as_u64_lenient(&self) -> Option<u64> {
if !matches!(
self.typ(),
Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date
) {
return None;
}
let value_bytes = self.raw_value_bytes_payload();
Some(u64::from_be_bytes(value_bytes.try_into().ok()?))
}
fn get_fast_type<T: FastValue>(&self) -> Option<T> {
if self.typ() != T::to_type() {
return None;