mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-07-02 15:20:42 +00:00
Compare commits
3 Commits
pub_term_i
...
clippy
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42d721214b | ||
|
|
a9733ba8c2 | ||
|
|
874d54a63a |
@@ -4,7 +4,7 @@ use binggan::{black_box, BenchGroup, BenchRunner};
|
||||
use rand::prelude::*;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
use tantivy::collector::{Count, DocSetCollector, TopDocs};
|
||||
use tantivy::collector::{Count, TopDocs};
|
||||
use tantivy::query::RangeQuery;
|
||||
use tantivy::schema::{Schema, FAST, INDEXED};
|
||||
use tantivy::{doc, Index, Order, ReloadPolicy, Searcher, Term};
|
||||
@@ -183,7 +183,6 @@ fn run_benchmark_tasks(
|
||||
// Test top 100 by the field (ascending order)
|
||||
{
|
||||
let collector_name = format!("top100_by_{}_asc", field_name);
|
||||
let field_name_owned = field_name.to_string();
|
||||
add_bench_task_top100_asc(
|
||||
bench_group,
|
||||
bench_index,
|
||||
@@ -192,14 +191,12 @@ fn run_benchmark_tasks(
|
||||
field_name,
|
||||
range_low,
|
||||
range_high,
|
||||
field_name_owned,
|
||||
);
|
||||
}
|
||||
|
||||
// Test top 100 by the field (descending order)
|
||||
{
|
||||
let collector_name = format!("top100_by_{}_desc", field_name);
|
||||
let field_name_owned = field_name.to_string();
|
||||
add_bench_task_top100_desc(
|
||||
bench_group,
|
||||
bench_index,
|
||||
@@ -208,7 +205,6 @@ fn run_benchmark_tasks(
|
||||
field_name,
|
||||
range_low,
|
||||
range_high,
|
||||
field_name_owned,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -234,27 +230,6 @@ fn add_bench_task_count(
|
||||
bench_group.register(task_name, move |_| black_box(search_task.run()));
|
||||
}
|
||||
|
||||
fn add_bench_task_docset(
|
||||
bench_group: &mut BenchGroup,
|
||||
bench_index: &BenchIndex,
|
||||
query: RangeQuery,
|
||||
collector_name: &str,
|
||||
field_name: &str,
|
||||
range_low: u64,
|
||||
range_high: u64,
|
||||
) {
|
||||
let task_name = format!(
|
||||
"range_{}_[{} TO {}]_{}",
|
||||
field_name, range_low, range_high, collector_name
|
||||
);
|
||||
|
||||
let search_task = DocSetSearchTask {
|
||||
searcher: bench_index.searcher.clone(),
|
||||
query,
|
||||
};
|
||||
bench_group.register(task_name, move |_| black_box(search_task.run()));
|
||||
}
|
||||
|
||||
fn add_bench_task_top100_asc(
|
||||
bench_group: &mut BenchGroup,
|
||||
bench_index: &BenchIndex,
|
||||
@@ -263,7 +238,6 @@ fn add_bench_task_top100_asc(
|
||||
field_name: &str,
|
||||
range_low: u64,
|
||||
range_high: u64,
|
||||
field_name_owned: String,
|
||||
) {
|
||||
let task_name = format!(
|
||||
"range_{}_[{} TO {}]_{}",
|
||||
@@ -273,7 +247,7 @@ fn add_bench_task_top100_asc(
|
||||
let search_task = Top100AscSearchTask {
|
||||
searcher: bench_index.searcher.clone(),
|
||||
query,
|
||||
field_name: field_name_owned,
|
||||
field_name: field_name.to_string(),
|
||||
};
|
||||
bench_group.register(task_name, move |_| black_box(search_task.run()));
|
||||
}
|
||||
@@ -286,7 +260,6 @@ fn add_bench_task_top100_desc(
|
||||
field_name: &str,
|
||||
range_low: u64,
|
||||
range_high: u64,
|
||||
field_name_owned: String,
|
||||
) {
|
||||
let task_name = format!(
|
||||
"range_{}_[{} TO {}]_{}",
|
||||
@@ -296,7 +269,7 @@ fn add_bench_task_top100_desc(
|
||||
let search_task = Top100DescSearchTask {
|
||||
searcher: bench_index.searcher.clone(),
|
||||
query,
|
||||
field_name: field_name_owned,
|
||||
field_name: field_name.to_string(),
|
||||
};
|
||||
bench_group.register(task_name, move |_| black_box(search_task.run()));
|
||||
}
|
||||
@@ -313,19 +286,6 @@ impl CountSearchTask {
|
||||
}
|
||||
}
|
||||
|
||||
struct DocSetSearchTask {
|
||||
searcher: Searcher,
|
||||
query: RangeQuery,
|
||||
}
|
||||
|
||||
impl DocSetSearchTask {
|
||||
#[inline(never)]
|
||||
pub fn run(&self) -> usize {
|
||||
let result = self.searcher.search(&self.query, &DocSetCollector).unwrap();
|
||||
result.len()
|
||||
}
|
||||
}
|
||||
|
||||
struct Top100AscSearchTask {
|
||||
searcher: Searcher,
|
||||
query: RangeQuery,
|
||||
|
||||
@@ -229,15 +229,7 @@ fn execute_query<T: Display>(
|
||||
suffix: &str,
|
||||
index: &Index,
|
||||
) -> NumHits {
|
||||
let gen_query_inclusive = |from: &T, to: &T| {
|
||||
format!(
|
||||
"{}:[{} TO {}] {}",
|
||||
field,
|
||||
&from.to_string(),
|
||||
&to.to_string(),
|
||||
suffix
|
||||
)
|
||||
};
|
||||
let gen_query_inclusive = |from: &T, to: &T| format!("{field}:[{from} TO {to}] {suffix}");
|
||||
|
||||
let query = gen_query_inclusive(id_range.start(), id_range.end());
|
||||
execute_query_(&query, index)
|
||||
|
||||
@@ -202,6 +202,7 @@ mod tests {
|
||||
assert_eq!(&output, &[1, 3, 4, 5, 6, 7, 8]);
|
||||
}
|
||||
|
||||
#[allow(clippy::reversed_empty_ranges)] // Intentional: exercises the start > end case.
|
||||
fn test_filter_impl_empty_range_aux(filter_impl: FilterImplPerInstructionSet) {
|
||||
// start > end: RangeInclusive::contains always returns false; output must be empty.
|
||||
// The SVE path's wrapping_sub would otherwise produce a huge range_width.
|
||||
|
||||
@@ -54,6 +54,6 @@ pub fn generate_columnar_with_name(card: Card, num_docs: u32, column_name: &str)
|
||||
}
|
||||
|
||||
let mut wrt: Vec<u8> = Vec::new();
|
||||
columnar_writer.serialize(num_docs, &mut wrt).unwrap();
|
||||
columnar_writer.serialize(num_docs, None, &mut wrt).unwrap();
|
||||
ColumnarReader::open(wrt).unwrap()
|
||||
}
|
||||
|
||||
@@ -445,9 +445,7 @@ fn deserialize_optional_index_block_metadatas(
|
||||
let mut block_metas = Vec::with_capacity(num_blocks + 1);
|
||||
let mut start_byte_offset = 0;
|
||||
let mut non_null_rows_before_block = 0;
|
||||
for block_meta_bytes in data.chunks_exact(SERIALIZED_BLOCK_META_NUM_BYTES) {
|
||||
let block_meta_bytes: [u8; SERIALIZED_BLOCK_META_NUM_BYTES] =
|
||||
block_meta_bytes.try_into().unwrap();
|
||||
for &block_meta_bytes in data.as_chunks::<SERIALIZED_BLOCK_META_NUM_BYTES>().0 {
|
||||
let SerializedBlockMeta {
|
||||
block_id,
|
||||
num_non_null_rows,
|
||||
|
||||
@@ -191,11 +191,13 @@ impl DenseBlock<'_> {
|
||||
from_block_id: u16,
|
||||
) -> impl Iterator<Item = (u16, DenseMiniBlock)> + '_ {
|
||||
self.0
|
||||
.chunks_exact(MINI_BLOCK_NUM_BYTES)
|
||||
.as_chunks::<MINI_BLOCK_NUM_BYTES>()
|
||||
.0
|
||||
.iter()
|
||||
.enumerate()
|
||||
.skip(from_block_id as usize)
|
||||
.map(|(block_id, bytes)| {
|
||||
let mini_block = DenseMiniBlock::from_bytes(bytes.try_into().unwrap());
|
||||
.map(|(block_id, &bytes)| {
|
||||
let mini_block = DenseMiniBlock::from_bytes(bytes);
|
||||
(block_id as u16, mini_block)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -64,20 +64,16 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
/// May panic if `idx` is greater than the column length.
|
||||
fn get_vals(&self, indexes: &[u32], output: &mut [T]) {
|
||||
assert!(indexes.len() == output.len());
|
||||
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
|
||||
for (out_x4, idx_x4) in out_and_idx_chunks {
|
||||
let (out_chunks, out_rem) = output.as_chunks_mut::<4>();
|
||||
let (idx_chunks, idx_rem) = indexes.as_chunks::<4>();
|
||||
for (out_x4, idx_x4) in out_chunks.iter_mut().zip(idx_chunks) {
|
||||
out_x4[0] = self.get_val(idx_x4[0]);
|
||||
out_x4[1] = self.get_val(idx_x4[1]);
|
||||
out_x4[2] = self.get_val(idx_x4[2]);
|
||||
out_x4[3] = self.get_val(idx_x4[3]);
|
||||
}
|
||||
|
||||
let out_and_idx_chunks = output
|
||||
.chunks_exact_mut(4)
|
||||
.into_remainder()
|
||||
.iter_mut()
|
||||
.zip(indexes.chunks_exact(4).remainder());
|
||||
for (out, idx) in out_and_idx_chunks {
|
||||
for (out, idx) in out_rem.iter_mut().zip(idx_rem) {
|
||||
*out = self.get_val(*idx);
|
||||
}
|
||||
}
|
||||
@@ -92,19 +88,15 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
/// May panic if `idx` is greater than the column length.
|
||||
fn get_vals_opt(&self, indexes: &[u32], output: &mut [Option<T>]) {
|
||||
assert!(indexes.len() == output.len());
|
||||
let out_and_idx_chunks = output.chunks_exact_mut(4).zip(indexes.chunks_exact(4));
|
||||
for (out_x4, idx_x4) in out_and_idx_chunks {
|
||||
let (out_chunks, out_rem) = output.as_chunks_mut::<4>();
|
||||
let (idx_chunks, idx_rem) = indexes.as_chunks::<4>();
|
||||
for (out_x4, idx_x4) in out_chunks.iter_mut().zip(idx_chunks) {
|
||||
out_x4[0] = Some(self.get_val(idx_x4[0]));
|
||||
out_x4[1] = Some(self.get_val(idx_x4[1]));
|
||||
out_x4[2] = Some(self.get_val(idx_x4[2]));
|
||||
out_x4[3] = Some(self.get_val(idx_x4[3]));
|
||||
}
|
||||
let out_and_idx_chunks = output
|
||||
.chunks_exact_mut(4)
|
||||
.into_remainder()
|
||||
.iter_mut()
|
||||
.zip(indexes.chunks_exact(4).remainder());
|
||||
for (out, idx) in out_and_idx_chunks {
|
||||
for (out, idx) in out_rem.iter_mut().zip(idx_rem) {
|
||||
*out = Some(self.get_val(*idx));
|
||||
}
|
||||
}
|
||||
@@ -119,16 +111,16 @@ pub trait ColumnValues<T: PartialOrd = u64>: Send + Sync + DowncastSync {
|
||||
/// the segment's `maxdoc`.
|
||||
#[inline(always)]
|
||||
fn get_range(&self, start: u64, output: &mut [T]) {
|
||||
let mut out_chunks = output.chunks_exact_mut(4);
|
||||
let (out_chunks, out_rem) = output.as_chunks_mut::<4>();
|
||||
let mut idx = start;
|
||||
for out_x4 in out_chunks.by_ref() {
|
||||
for out_x4 in out_chunks {
|
||||
out_x4[0] = self.get_val(idx as u32);
|
||||
out_x4[1] = self.get_val((idx + 1) as u32);
|
||||
out_x4[2] = self.get_val((idx + 2) as u32);
|
||||
out_x4[3] = self.get_val((idx + 3) as u32);
|
||||
idx += 4;
|
||||
}
|
||||
for out in out_chunks.into_remainder() {
|
||||
for out in out_rem {
|
||||
*out = self.get_val(idx as u32);
|
||||
idx += 1;
|
||||
}
|
||||
|
||||
@@ -450,14 +450,15 @@ fn num_docs_strategy() -> impl Strategy<Value = usize> {
|
||||
)
|
||||
}
|
||||
|
||||
type ColumnarDocs = Vec<Vec<(&'static str, ColumnValue)>>;
|
||||
|
||||
// A columnar contains up to 2 docs.
|
||||
fn columnar_docs_strategy() -> impl Strategy<Value = Vec<Vec<(&'static str, ColumnValue)>>> {
|
||||
fn columnar_docs_strategy() -> impl Strategy<Value = ColumnarDocs> {
|
||||
num_docs_strategy()
|
||||
.prop_flat_map(|num_docs| proptest::collection::vec(doc_strategy(), num_docs))
|
||||
}
|
||||
|
||||
fn columnar_docs_and_mapping_strategy()
|
||||
-> impl Strategy<Value = (Vec<Vec<(&'static str, ColumnValue)>>, Vec<RowId>)> {
|
||||
fn columnar_docs_and_mapping_strategy() -> impl Strategy<Value = (ColumnarDocs, Vec<RowId>)> {
|
||||
columnar_docs_strategy().prop_flat_map(|docs| {
|
||||
permutation_strategy(docs.len()).prop_map(move |permutation| (docs.clone(), permutation))
|
||||
})
|
||||
|
||||
@@ -373,8 +373,8 @@ impl ReadOnlyBitSet {
|
||||
/// Iterate the tinyset on the fly from serialized data.
|
||||
#[inline]
|
||||
fn iter_tinysets(&self) -> impl Iterator<Item = TinySet> + '_ {
|
||||
self.data.chunks_exact(8).map(move |chunk| {
|
||||
let tinyset: TinySet = TinySet::deserialize(chunk.try_into().unwrap());
|
||||
self.data.as_chunks::<8>().0.iter().map(move |&chunk| {
|
||||
let tinyset: TinySet = TinySet::deserialize(chunk);
|
||||
tinyset
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1233,7 +1233,7 @@ pub(crate) trait GetDocCount {
|
||||
|
||||
impl GetDocCount for (String, IntermediateTermBucketEntry) {
|
||||
fn doc_count(&self) -> u64 {
|
||||
self.1.doc_count as u64
|
||||
self.1.doc_count
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -738,7 +738,7 @@ impl IntermediateTermBucketResult {
|
||||
let mut buckets: Vec<BucketEntry> = self
|
||||
.entries
|
||||
.into_iter()
|
||||
.filter(|bucket| bucket.1.doc_count as u64 >= req.min_doc_count)
|
||||
.filter(|bucket| bucket.1.doc_count >= req.min_doc_count)
|
||||
.map(|(key, entry)| {
|
||||
let key_as_string = match key {
|
||||
IntermediateKey::Bool(key) => {
|
||||
@@ -750,7 +750,7 @@ impl IntermediateTermBucketResult {
|
||||
Ok(BucketEntry {
|
||||
key_as_string,
|
||||
key: key.into(),
|
||||
doc_count: entry.doc_count as u64,
|
||||
doc_count: entry.doc_count,
|
||||
sub_aggregation: entry
|
||||
.sub_aggregation
|
||||
.into_final_result_internal(sub_aggregation_req, limits)?,
|
||||
@@ -1047,7 +1047,7 @@ impl IntermediateCompositeBucketResult {
|
||||
.collect();
|
||||
Ok(CompositeBucketEntry {
|
||||
key,
|
||||
doc_count: entry.doc_count as u64,
|
||||
doc_count: entry.doc_count,
|
||||
sub_aggregation: entry
|
||||
.sub_aggregation
|
||||
.into_final_result_internal(sub_aggregation_req, limits)?,
|
||||
|
||||
@@ -105,6 +105,7 @@ impl Executor {
|
||||
///
|
||||
/// If the task panics, returns `Err(())`.
|
||||
#[cfg(feature = "quickwit")]
|
||||
#[allow(clippy::result_unit_err)] // `Err(())` only signals a panic; no error info to convey.
|
||||
pub fn spawn_blocking<T: Send + 'static>(
|
||||
&self,
|
||||
cpu_intensive_task: impl FnOnce() -> T + Send + 'static,
|
||||
|
||||
@@ -283,8 +283,7 @@ impl InvertedIndexReader {
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
impl InvertedIndexReader {
|
||||
/// Resolves a `Term` to its [`TermInfo`] asynchronously, if present in the dictionary.
|
||||
pub async fn get_term_info_async(&self, term: &Term) -> io::Result<Option<TermInfo>> {
|
||||
pub(crate) async fn get_term_info_async(&self, term: &Term) -> io::Result<Option<TermInfo>> {
|
||||
self.termdict.get_async(term.serialized_value_bytes()).await
|
||||
}
|
||||
|
||||
@@ -337,38 +336,23 @@ impl InvertedIndexReader {
|
||||
pub async fn warm_postings(&self, term: &Term, with_positions: bool) -> io::Result<bool> {
|
||||
let term_info_opt: Option<TermInfo> = self.get_term_info_async(term).await?;
|
||||
if let Some(term_info) = term_info_opt {
|
||||
self.warm_postings_from_term_info(&term_info, with_positions)
|
||||
.await?;
|
||||
let postings = self
|
||||
.postings_file_slice
|
||||
.read_bytes_slice_async(term_info.postings_range.clone());
|
||||
if with_positions {
|
||||
let positions = self
|
||||
.positions_file_slice
|
||||
.read_bytes_slice_async(term_info.positions_range.clone());
|
||||
futures_util::future::try_join(postings, positions).await?;
|
||||
} else {
|
||||
postings.await?;
|
||||
}
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Warmup a block postings given a `TermInfo`.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Use this when the [`TermInfo`] is already known (e.g. resolved via
|
||||
/// [`Self::get_term_info_async`]) to avoid a redundant dictionary lookup.
|
||||
pub async fn warm_postings_from_term_info(
|
||||
&self,
|
||||
term_info: &TermInfo,
|
||||
with_positions: bool,
|
||||
) -> io::Result<()> {
|
||||
let postings = self
|
||||
.postings_file_slice
|
||||
.read_bytes_slice_async(term_info.postings_range.clone());
|
||||
if with_positions {
|
||||
let positions = self
|
||||
.positions_file_slice
|
||||
.read_bytes_slice_async(term_info.positions_range.clone());
|
||||
futures_util::future::try_join(postings, positions).await?;
|
||||
} else {
|
||||
postings.await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Warmup a block postings given a range of `Term`s.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
@@ -462,7 +446,7 @@ impl InvertedIndexReader {
|
||||
});
|
||||
|
||||
for posting_range in merged_posting_ranges_iter {
|
||||
if let Err(_) = sender.unbounded_send(posting_range) {
|
||||
if sender.unbounded_send(posting_range).is_err() {
|
||||
// this should happen only when search is cancelled
|
||||
return Err(io::Error::other("failed to send posting range back"));
|
||||
}
|
||||
|
||||
@@ -2249,7 +2249,7 @@ mod tests {
|
||||
from: T1,
|
||||
to: T2,
|
||||
) -> String {
|
||||
format!("{}:[{} TO {}]", field, &from.to_string(), &to.to_string())
|
||||
format!("{}:[{} TO {}]", field, from.to_string(), to.to_string())
|
||||
}
|
||||
|
||||
// Query first half
|
||||
@@ -2308,7 +2308,7 @@ mod tests {
|
||||
continue;
|
||||
}
|
||||
let gen_query_inclusive = |field: &str, from: Ipv6Addr, to: Ipv6Addr| {
|
||||
format!("{}:[{} TO {}]", field, &from.to_string(), &to.to_string())
|
||||
format!("{}:[{} TO {}]", field, from, to)
|
||||
};
|
||||
let ip = ip_from_id(existing_id);
|
||||
|
||||
@@ -2821,7 +2821,7 @@ mod tests {
|
||||
.add_document(doc!(field=>json!({"\u{0000}": "A"})))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(field=>json!({format!("\u{0000}\u{0000}"): "A"})))
|
||||
.add_document(doc!(field=>json!({"\u{0000}\u{0000}".to_string(): "A"})))
|
||||
.unwrap();
|
||||
index_writer.commit().unwrap();
|
||||
Ok(())
|
||||
|
||||
@@ -198,7 +198,7 @@ mod tests_mmap {
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(field=>json!({format!("{field_name_in}"): "test1", format!("num{field_name_in}"): 10})))
|
||||
.add_document(doc!(field=>json!({field_name_in.to_string(): "test1", format!("num{field_name_in}"): 10})))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(field=>json!({format!("a{field_name_in}"): "test2"})))
|
||||
|
||||
@@ -41,10 +41,7 @@ pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate:
|
||||
// Just adding a new line at the end of the buffer.
|
||||
writeln!(&mut buffer)?;
|
||||
crate::fail_point!("save_metas", |msg| Err(crate::TantivyError::from(
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
msg.unwrap_or_else(|| "Undefined".to_string())
|
||||
)
|
||||
std::io::Error::other(msg.unwrap_or_else(|| "Undefined".to_string()))
|
||||
)));
|
||||
directory.sync_directory()?;
|
||||
directory.atomic_write(&META_FILEPATH, &buffer[..])?;
|
||||
|
||||
@@ -222,7 +222,7 @@ impl<'a, W: Write> FieldSerializer<'a, W> {
|
||||
/// using `VInt` encoding.
|
||||
pub fn close_term(&mut self) -> io::Result<()> {
|
||||
crate::fail_point!("FieldSerializer::close_term", |msg: Option<String>| {
|
||||
Err(io::Error::new(io::ErrorKind::Other, format!("{msg:?}")))
|
||||
Err(io::Error::other(format!("{msg:?}")))
|
||||
});
|
||||
|
||||
if !self.term_open {
|
||||
|
||||
@@ -91,10 +91,14 @@ fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
|
||||
num_docs: u32,
|
||||
) -> Box<dyn Scorer> {
|
||||
match scorer {
|
||||
SpecializedScorer::TermUnion(term_scorers) => {
|
||||
let union_scorer =
|
||||
BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
|
||||
Box::new(union_scorer)
|
||||
SpecializedScorer::TermUnion(mut term_scorers) => {
|
||||
if term_scorers.len() == 1 {
|
||||
Box::new(term_scorers.pop().unwrap())
|
||||
} else {
|
||||
let union_scorer =
|
||||
BufferedUnionScorer::build(term_scorers, score_combiner_fn, num_docs);
|
||||
Box::new(union_scorer)
|
||||
}
|
||||
}
|
||||
SpecializedScorer::TermIntersection(term_scorers) => {
|
||||
let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
|
||||
@@ -504,10 +508,15 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
|
||||
let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?;
|
||||
let num_docs = reader.num_docs();
|
||||
match scorer {
|
||||
SpecializedScorer::TermUnion(term_scorers) => {
|
||||
let mut union_scorer =
|
||||
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
|
||||
for_each_scorer(&mut union_scorer, callback);
|
||||
SpecializedScorer::TermUnion(mut term_scorers) => {
|
||||
if term_scorers.len() == 1 {
|
||||
let mut term_scorer = term_scorers.pop().unwrap();
|
||||
for_each_scorer(&mut term_scorer, callback);
|
||||
} else {
|
||||
let mut union_scorer =
|
||||
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
|
||||
for_each_scorer(&mut union_scorer, callback);
|
||||
}
|
||||
}
|
||||
SpecializedScorer::TermIntersection(term_scorers) => {
|
||||
let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
|
||||
@@ -534,10 +543,15 @@ impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombin
|
||||
let mut buffer = [0u32; COLLECT_BLOCK_BUFFER_LEN];
|
||||
|
||||
match scorer {
|
||||
SpecializedScorer::TermUnion(term_scorers) => {
|
||||
let mut union_scorer =
|
||||
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
|
||||
for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
|
||||
SpecializedScorer::TermUnion(mut term_scorers) => {
|
||||
if term_scorers.len() == 1 {
|
||||
let mut term_scorer = term_scorers.pop().unwrap();
|
||||
for_each_docset_buffered(&mut term_scorer, &mut buffer, callback);
|
||||
} else {
|
||||
let mut union_scorer =
|
||||
BufferedUnionScorer::build(term_scorers, &self.score_combiner_fn, num_docs);
|
||||
for_each_docset_buffered(&mut union_scorer, &mut buffer, callback);
|
||||
}
|
||||
}
|
||||
SpecializedScorer::TermIntersection(term_scorers) => {
|
||||
let boxed_scorers: Vec<Box<dyn Scorer>> = term_scorers
|
||||
|
||||
@@ -1364,21 +1364,21 @@ mod tests {
|
||||
"{} AND {}:{}",
|
||||
gen_query_inclusive("id", ids[0]..=ids[1]),
|
||||
field_path("id_name"),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
let query = format!(
|
||||
"{} AND {}:{}",
|
||||
gen_query_inclusive("id_f64", ids[0]..=ids[1]),
|
||||
field_path("id_name"),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
let query = format!(
|
||||
"{} AND {}:{}",
|
||||
gen_query_inclusive("id_i64", ids[0]..=ids[1]),
|
||||
field_path("id_name"),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
|
||||
@@ -1388,21 +1388,21 @@ mod tests {
|
||||
"{} AND {}:{}",
|
||||
gen_query_inclusive("ids", ids[0]..=ids[1]),
|
||||
field_path("id_name"),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
let query = format!(
|
||||
"{} AND {}:{}",
|
||||
gen_query_inclusive("ids_f64", ids[0]..=ids[1]),
|
||||
field_path("id_name"),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
let query = format!(
|
||||
"{} AND {}:{}",
|
||||
gen_query_inclusive("ids_i64", ids[0]..=ids[1]),
|
||||
field_path("id_name"),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
};
|
||||
@@ -1651,7 +1651,7 @@ pub(crate) mod ip_range_tests {
|
||||
let query = format!(
|
||||
"{} AND id:{}",
|
||||
gen_query_inclusive("ip", &ip_range),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
|
||||
@@ -1660,7 +1660,7 @@ pub(crate) mod ip_range_tests {
|
||||
let query = format!(
|
||||
"{} AND id:{}",
|
||||
gen_query_inclusive("ips", &ip_range),
|
||||
&id_filter
|
||||
id_filter
|
||||
);
|
||||
assert_eq!(get_num_hits(query_from_text(&query)), expected_num_hits);
|
||||
};
|
||||
|
||||
@@ -55,6 +55,11 @@ pub struct BufferedUnionScorer<TScorer, TScoreCombiner = DoNothingCombiner> {
|
||||
num_docs: u32,
|
||||
}
|
||||
|
||||
// Keep this helper out-of-line. When LLVM inlines it into
|
||||
// `BufferedUnionScorer::advance`, the full traversal path used by combined
|
||||
// collectors such as `(TopDocs, Count)` becomes sensitive to unrelated codegen
|
||||
// changes and regresses on large unions.
|
||||
#[inline(never)]
|
||||
fn refill<TScorer: Scorer, TScoreCombiner: ScoreCombiner>(
|
||||
scorers: &mut Vec<TScorer>,
|
||||
bitsets: &mut [TinySet; HORIZON_NUM_TINYBITSETS],
|
||||
|
||||
@@ -5,6 +5,7 @@ extern crate test;
|
||||
|
||||
mod arena_hashmap;
|
||||
mod expull;
|
||||
#[cfg(not(feature = "compare_hash_only"))]
|
||||
mod fastcmp;
|
||||
mod fastcpy;
|
||||
mod memory_arena;
|
||||
|
||||
Reference in New Issue
Block a user