mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-09 10:32:55 +00:00
Compare commits
10 Commits
githubacti
...
bugfix-uni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9b0ffc401e | ||
|
|
3f1ecf53ab | ||
|
|
0b583b8130 | ||
|
|
31d18dca1c | ||
|
|
5e06e7de5a | ||
|
|
8af53cbd36 | ||
|
|
4914076e8f | ||
|
|
e04f47e922 | ||
|
|
f355695581 | ||
|
|
cbacdf0de8 |
28
.github/workflows/ci.yml
vendored
28
.github/workflows/ci.yml
vendored
@@ -1,28 +0,0 @@
|
|||||||
name: Tantivy CI
|
|
||||||
|
|
||||||
on: [push]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
name: Test Suite
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
profile: minimal
|
|
||||||
toolchain: stable
|
|
||||||
override: true
|
|
||||||
- uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: test
|
|
||||||
- uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: fmt
|
|
||||||
args: --all -- --check
|
|
||||||
- run: rustup component add clippy
|
|
||||||
- uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: clippy
|
|
||||||
args: -- -D warnings
|
|
||||||
|
|
||||||
66
.github/workflows/coveralls.yml
vendored
66
.github/workflows/coveralls.yml
vendored
@@ -1,66 +0,0 @@
|
|||||||
on: [push]
|
|
||||||
|
|
||||||
name: Code coverage with grcov
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
grcov:
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
os:
|
|
||||||
- ubuntu-latest
|
|
||||||
#- macOS-latest
|
|
||||||
#- windows-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Install toolchain
|
|
||||||
uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
toolchain: nightly
|
|
||||||
override: true
|
|
||||||
profile: minimal
|
|
||||||
|
|
||||||
- name: Execute tests
|
|
||||||
uses: actions-rs/cargo@v1
|
|
||||||
with:
|
|
||||||
command: test
|
|
||||||
args: --all --lib
|
|
||||||
env:
|
|
||||||
CARGO_INCREMENTAL: 0
|
|
||||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests"
|
|
||||||
|
|
||||||
# Note that `actions-rs/grcov` Action can install `grcov` too,
|
|
||||||
# but can't use faster installation methods yet.
|
|
||||||
# As a temporary experiment `actions-rs/install` Action plugged in here.
|
|
||||||
# Consider **NOT** to copy that into your workflow,
|
|
||||||
# but use `actions-rs/grcov` only
|
|
||||||
- name: Pre-installing grcov
|
|
||||||
uses: actions-rs/install@v0.1
|
|
||||||
with:
|
|
||||||
crate: grcov
|
|
||||||
use-tool-cache: true
|
|
||||||
|
|
||||||
- name: Gather coverage data
|
|
||||||
id: coverage
|
|
||||||
uses: actions-rs/grcov@v0.1
|
|
||||||
with:
|
|
||||||
coveralls-token: ${{ secrets.COVERALLS_TOKEN }}
|
|
||||||
|
|
||||||
- name: Coveralls upload
|
|
||||||
uses: coverallsapp/github-action@master
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
parallel: true
|
|
||||||
path-to-lcov: ${{ steps.coverage.outputs.report }}
|
|
||||||
|
|
||||||
grcov_finalize:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: grcov
|
|
||||||
steps:
|
|
||||||
- name: Coveralls finalization
|
|
||||||
uses: coverallsapp/github-action@master
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
parallel-finished: true
|
|
||||||
@@ -1,3 +1,7 @@
|
|||||||
|
Tantivy 0.14.0
|
||||||
|
=========================
|
||||||
|
- Remove dependency to atomicwrites #833 .Implemented by @pmasurel upon suggestion and research from @asafigan).
|
||||||
|
|
||||||
Tantivy 0.13.0
|
Tantivy 0.13.0
|
||||||
======================
|
======================
|
||||||
Tantivy 0.13 introduce a change in the index format that will require
|
Tantivy 0.13 introduce a change in the index format that will require
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.13.0"
|
version = "0.14.0-dev"
|
||||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
categories = ["database-implementations", "data-structures"]
|
categories = ["database-implementations", "data-structures"]
|
||||||
@@ -22,8 +22,7 @@ tantivy-fst = "0.3"
|
|||||||
memmap = {version = "0.7", optional=true}
|
memmap = {version = "0.7", optional=true}
|
||||||
lz4 = {version="1.20", optional=true}
|
lz4 = {version="1.20", optional=true}
|
||||||
snap = "1"
|
snap = "1"
|
||||||
atomicwrites = {version="0.2.2", optional=true}
|
tempfile = {version="3.0", optional=true}
|
||||||
tempfile = "3.0"
|
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
serde = {version="1.0", features=["derive"]}
|
serde = {version="1.0", features=["derive"]}
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
@@ -75,7 +74,7 @@ overflow-checks = true
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["mmap"]
|
default = ["mmap"]
|
||||||
mmap = ["atomicwrites", "fs2", "memmap", "notify"]
|
mmap = ["fs2", "tempfile", "memmap", "notify"]
|
||||||
lz4-compression = ["lz4"]
|
lz4-compression = ["lz4"]
|
||||||
failpoints = ["fail/failpoints"]
|
failpoints = ["fail/failpoints"]
|
||||||
unstable = [] # useful for benches.
|
unstable = [] # useful for benches.
|
||||||
|
|||||||
@@ -34,11 +34,6 @@ Tantivy is, in fact, strongly inspired by Lucene's design.
|
|||||||
The following [benchmark](https://tantivy-search.github.io/bench/) break downs
|
The following [benchmark](https://tantivy-search.github.io/bench/) break downs
|
||||||
performance for different type of queries / collection.
|
performance for different type of queries / collection.
|
||||||
|
|
||||||
|
|
||||||
In general, Tantivy tends to be
|
|
||||||
- slower than Lucene on union with a Top-K due to Block-WAND optimization.
|
|
||||||
- faster than Lucene on intersection and phrase queries.
|
|
||||||
|
|
||||||
Your mileage WILL vary depending on the nature of queries and their load.
|
Your mileage WILL vary depending on the nature of queries and their load.
|
||||||
|
|
||||||
# Features
|
# Features
|
||||||
|
|||||||
@@ -112,18 +112,6 @@ fn main() -> tantivy::Result<()> {
|
|||||||
limbs and branches that arch over the pool"
|
limbs and branches that arch over the pool"
|
||||||
));
|
));
|
||||||
|
|
||||||
index_writer.add_document(doc!(
|
|
||||||
title => "Of Mice and Men",
|
|
||||||
body => "A few miles south of Soledad, the Salinas River drops in close to the hillside \
|
|
||||||
bank and runs deep and green. The water is warm too, for it has slipped twinkling \
|
|
||||||
over the yellow sands in the sunlight before reaching the narrow pool. On one \
|
|
||||||
side of the river the golden foothill slopes curve up to the strong and rocky \
|
|
||||||
Gabilan Mountains, but on the valley side the water is lined with trees—willows \
|
|
||||||
fresh and green with every spring, carrying in their lower leaf junctures the \
|
|
||||||
debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
|
|
||||||
limbs and branches that arch over the pool"
|
|
||||||
));
|
|
||||||
|
|
||||||
// Multivalued field just need to be repeated.
|
// Multivalued field just need to be repeated.
|
||||||
index_writer.add_document(doc!(
|
index_writer.add_document(doc!(
|
||||||
title => "Frankenstein",
|
title => "Frankenstein",
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ mod test {
|
|||||||
use crate::Occur;
|
use crate::Occur;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_Occur_compose() {
|
fn test_occur_compose() {
|
||||||
assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
|
assert_eq!(Occur::compose(Occur::Should, Occur::Should), Occur::Should);
|
||||||
assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
|
assert_eq!(Occur::compose(Occur::Should, Occur::Must), Occur::Must);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|||||||
@@ -539,7 +539,6 @@ mod tests {
|
|||||||
test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
|
test_index_on_commit_reload_policy_aux(field, &write_index, &reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
|
fn test_index_on_commit_reload_policy_aux(field: Field, index: &Index, reader: &IndexReader) {
|
||||||
let mut reader_index = reader.index();
|
let mut reader_index = reader.index();
|
||||||
let (sender, receiver) = crossbeam::channel::unbounded();
|
let (sender, receiver) = crossbeam::channel::unbounded();
|
||||||
@@ -550,12 +549,23 @@ mod tests {
|
|||||||
assert_eq!(reader.searcher().num_docs(), 0);
|
assert_eq!(reader.searcher().num_docs(), 0);
|
||||||
writer.add_document(doc!(field=>1u64));
|
writer.add_document(doc!(field=>1u64));
|
||||||
writer.commit().unwrap();
|
writer.commit().unwrap();
|
||||||
assert!(receiver.recv().is_ok());
|
// We need a loop here because it is possible for notify to send more than
|
||||||
assert_eq!(reader.searcher().num_docs(), 1);
|
// one modify event. It was observed on CI on MacOS.
|
||||||
|
loop {
|
||||||
|
assert!(receiver.recv().is_ok());
|
||||||
|
if reader.searcher().num_docs() == 1 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
writer.add_document(doc!(field=>2u64));
|
writer.add_document(doc!(field=>2u64));
|
||||||
writer.commit().unwrap();
|
writer.commit().unwrap();
|
||||||
assert!(receiver.recv().is_ok());
|
// ... Same as above
|
||||||
assert_eq!(reader.searcher().num_docs(), 2);
|
loop {
|
||||||
|
assert!(receiver.recv().is_ok());
|
||||||
|
if reader.searcher().num_docs() == 2 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This test will not pass on windows, because windows
|
// This test will not pass on windows, because windows
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ use std::sync::Mutex;
|
|||||||
use std::sync::RwLock;
|
use std::sync::RwLock;
|
||||||
use std::sync::Weak;
|
use std::sync::Weak;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
use tempfile;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
/// Create a default io error given a string.
|
/// Create a default io error given a string.
|
||||||
@@ -487,11 +488,13 @@ impl Directory for MmapDirectory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn atomic_write(&mut self, path: &Path, data: &[u8]) -> io::Result<()> {
|
fn atomic_write(&mut self, path: &Path, content: &[u8]) -> io::Result<()> {
|
||||||
debug!("Atomic Write {:?}", path);
|
debug!("Atomic Write {:?}", path);
|
||||||
|
let mut tempfile = tempfile::NamedTempFile::new()?;
|
||||||
|
tempfile.write_all(content)?;
|
||||||
|
tempfile.flush()?;
|
||||||
let full_path = self.resolve_path(path);
|
let full_path = self.resolve_path(path);
|
||||||
let meta_file = atomicwrites::AtomicFile::new(full_path, atomicwrites::AllowOverwrite);
|
tempfile.into_temp_path().persist(full_path)?;
|
||||||
meta_file.write(|f| f.write_all(data))?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -211,19 +211,19 @@ fn test_watch(directory: &mut dyn Directory) {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
for i in 0..10 {
|
for i in 0..10 {
|
||||||
assert_eq!(i, counter.load(SeqCst));
|
assert!(i <= counter.load(SeqCst));
|
||||||
assert!(directory
|
assert!(directory
|
||||||
.atomic_write(Path::new("meta.json"), b"random_test_data_2")
|
.atomic_write(Path::new("meta.json"), b"random_test_data_2")
|
||||||
.is_ok());
|
.is_ok());
|
||||||
assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
|
assert_eq!(receiver.recv_timeout(Duration::from_millis(500)), Ok(i));
|
||||||
assert_eq!(i + 1, counter.load(SeqCst));
|
assert!(i + 1 <= counter.load(SeqCst)); // notify can trigger more than once.
|
||||||
}
|
}
|
||||||
mem::drop(watch_handle);
|
mem::drop(watch_handle);
|
||||||
assert!(directory
|
assert!(directory
|
||||||
.atomic_write(Path::new("meta.json"), b"random_test_data")
|
.atomic_write(Path::new("meta.json"), b"random_test_data")
|
||||||
.is_ok());
|
.is_ok());
|
||||||
assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
|
assert!(receiver.recv_timeout(Duration::from_millis(500)).is_ok());
|
||||||
assert_eq!(10, counter.load(SeqCst));
|
assert!(10 <= counter.load(SeqCst));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_lock_non_blocking(directory: &mut dyn Directory) {
|
fn test_lock_non_blocking(directory: &mut dyn Directory) {
|
||||||
|
|||||||
@@ -29,8 +29,9 @@ pub use self::segment_writer::SegmentWriter;
|
|||||||
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
||||||
pub type DefaultMergePolicy = LogMergePolicy;
|
pub type DefaultMergePolicy = LogMergePolicy;
|
||||||
|
|
||||||
|
#[cfg(feature = "mmap")]
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests_mmap {
|
||||||
use crate::schema::{self, Schema};
|
use crate::schema::{self, Schema};
|
||||||
use crate::{Index, Term};
|
use crate::{Index, Term};
|
||||||
|
|
||||||
|
|||||||
@@ -4,19 +4,6 @@ use crate::{DocId, DocSet, Score, TERMINATED};
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::ops::DerefMut;
|
use std::ops::DerefMut;
|
||||||
|
|
||||||
fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
|
|
||||||
if let Some(first) = it.next() {
|
|
||||||
let mut prev = first;
|
|
||||||
for doc in it {
|
|
||||||
if doc < prev {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
prev = doc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Takes a term_scorers sorted by their current doc() and a threshold and returns
|
/// Takes a term_scorers sorted by their current doc() and a threshold and returns
|
||||||
/// Returns (pivot_len, pivot_ord) defined as follows:
|
/// Returns (pivot_len, pivot_ord) defined as follows:
|
||||||
/// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
|
/// - `pivot_doc` lowest document that has a chance of exceeding (>) the threshold score.
|
||||||
@@ -55,37 +42,12 @@ fn find_pivot_doc(
|
|||||||
Some((before_pivot_len, pivot_len, pivot_doc))
|
Some((before_pivot_len, pivot_len, pivot_doc))
|
||||||
}
|
}
|
||||||
|
|
||||||
struct TermScorerWithMaxScore<'a> {
|
|
||||||
scorer: &'a mut TermScorer,
|
|
||||||
max_score: Score,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
|
|
||||||
fn from(scorer: &'a mut TermScorer) -> Self {
|
|
||||||
let max_score = scorer.max_score();
|
|
||||||
TermScorerWithMaxScore { scorer, max_score }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Deref for TermScorerWithMaxScore<'a> {
|
|
||||||
type Target = TermScorer;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
self.scorer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
|
|
||||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
||||||
self.scorer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Before and after calling this method, scorers need to be sorted by their `.doc()`.
|
// Before and after calling this method, scorers need to be sorted by their `.doc()`.
|
||||||
fn block_max_was_too_low_advance_one_scorer(
|
fn block_max_was_too_low_advance_one_scorer(
|
||||||
scorers: &mut Vec<TermScorerWithMaxScore>,
|
scorers: &mut Vec<TermScorerWithMaxScore>,
|
||||||
pivot_len: usize,
|
pivot_len: usize,
|
||||||
) {
|
) {
|
||||||
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
let mut scorer_to_seek = pivot_len - 1;
|
let mut scorer_to_seek = pivot_len - 1;
|
||||||
let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
|
let mut doc_to_seek_after = scorers[scorer_to_seek].doc();
|
||||||
for scorer_ord in (0..pivot_len - 1).rev() {
|
for scorer_ord in (0..pivot_len - 1).rev() {
|
||||||
@@ -102,6 +64,7 @@ fn block_max_was_too_low_advance_one_scorer(
|
|||||||
}
|
}
|
||||||
scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
|
scorers[scorer_to_seek].seek(doc_to_seek_after + 1);
|
||||||
restore_ordering(scorers, scorer_to_seek);
|
restore_ordering(scorers, scorer_to_seek);
|
||||||
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
|
// Given a list of term_scorers and a `ord` and assuming that `term_scorers[ord]` is sorted
|
||||||
@@ -177,64 +140,99 @@ pub fn block_wand(
|
|||||||
.map(TermScorerWithMaxScore::from)
|
.map(TermScorerWithMaxScore::from)
|
||||||
.collect();
|
.collect();
|
||||||
scorers.sort_by_key(|scorer| scorer.doc());
|
scorers.sort_by_key(|scorer| scorer.doc());
|
||||||
loop {
|
// At this point we need to ensure that the scorers are sorted!
|
||||||
// At this point we need to ensure that the scorers are sorted!
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
|
while let Some((before_pivot_len, pivot_len, pivot_doc)) =
|
||||||
|
find_pivot_doc(&scorers[..], threshold)
|
||||||
|
{
|
||||||
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
debug_assert!(is_sorted(scorers.iter().map(|scorer| scorer.doc())));
|
||||||
if let Some((before_pivot_len, pivot_len, pivot_doc)) =
|
debug_assert_ne!(pivot_doc, TERMINATED);
|
||||||
find_pivot_doc(&scorers[..], threshold)
|
debug_assert!(before_pivot_len < pivot_len);
|
||||||
{
|
|
||||||
debug_assert_ne!(pivot_doc, TERMINATED);
|
|
||||||
debug_assert!(before_pivot_len < pivot_len);
|
|
||||||
|
|
||||||
let block_max_score_upperbound: Score = scorers[..pivot_len]
|
let block_max_score_upperbound: Score = scorers[..pivot_len]
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.map(|scorer| {
|
.map(|scorer| {
|
||||||
scorer.shallow_seek(pivot_doc);
|
scorer.shallow_seek(pivot_doc);
|
||||||
scorer.block_max_score()
|
scorer.block_max_score()
|
||||||
})
|
})
|
||||||
.sum();
|
.sum();
|
||||||
|
|
||||||
// Beware after shallow advance, skip readers can be in advance compared to
|
// Beware after shallow advance, skip readers can be in advance compared to
|
||||||
// the segment posting lists.
|
// the segment posting lists.
|
||||||
//
|
//
|
||||||
// `block_segment_postings.load_block()` need to be called separately.
|
// `block_segment_postings.load_block()` need to be called separately.
|
||||||
if block_max_score_upperbound <= threshold {
|
if block_max_score_upperbound <= threshold {
|
||||||
// Block max condition was not reached
|
// Block max condition was not reached
|
||||||
// We could get away by simply advancing the scorers to DocId + 1 but it would
|
// We could get away by simply advancing the scorers to DocId + 1 but it would
|
||||||
// be inefficient. The optimization requires proper explanation and was
|
// be inefficient. The optimization requires proper explanation and was
|
||||||
// isolated in a different function.
|
// isolated in a different function.
|
||||||
block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
|
block_max_was_too_low_advance_one_scorer(&mut scorers, pivot_len);
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
// Block max condition is observed.
|
|
||||||
//
|
|
||||||
// Let's try and advance all scorers before the pivot to the pivot.
|
|
||||||
if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
|
|
||||||
// At least of the scorer does not contain the pivot.
|
|
||||||
//
|
|
||||||
// Let's stop scoring this pivot and go through the pivot selection again.
|
|
||||||
// Note that the current pivot is not necessarily a bad candidate and it
|
|
||||||
// may be picked again.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, all scorers are positioned on the doc.
|
|
||||||
let score = scorers[..pivot_len]
|
|
||||||
.iter_mut()
|
|
||||||
.map(|scorer| scorer.score())
|
|
||||||
.sum();
|
|
||||||
if score > threshold {
|
|
||||||
threshold = callback(pivot_doc, score);
|
|
||||||
}
|
|
||||||
// let's advance all of the scorers that are currently positioned on the pivot.
|
|
||||||
advance_all_scorers_on_pivot(&mut scorers, pivot_len);
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Block max condition is observed.
|
||||||
|
//
|
||||||
|
// Let's try and advance all scorers before the pivot to the pivot.
|
||||||
|
if !align_scorers(&mut scorers, pivot_doc, before_pivot_len) {
|
||||||
|
// At least of the scorer does not contain the pivot.
|
||||||
|
//
|
||||||
|
// Let's stop scoring this pivot and go through the pivot selection again.
|
||||||
|
// Note that the current pivot is not necessarily a bad candidate and it
|
||||||
|
// may be picked again.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point, all scorers are positioned on the doc.
|
||||||
|
let score = scorers[..pivot_len]
|
||||||
|
.iter_mut()
|
||||||
|
.map(|scorer| scorer.score())
|
||||||
|
.sum();
|
||||||
|
if score > threshold {
|
||||||
|
threshold = callback(pivot_doc, score);
|
||||||
|
}
|
||||||
|
// let's advance all of the scorers that are currently positioned on the pivot.
|
||||||
|
advance_all_scorers_on_pivot(&mut scorers, pivot_len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct TermScorerWithMaxScore<'a> {
|
||||||
|
scorer: &'a mut TermScorer,
|
||||||
|
max_score: Score,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
|
||||||
|
fn from(scorer: &'a mut TermScorer) -> Self {
|
||||||
|
let max_score = scorer.max_score();
|
||||||
|
TermScorerWithMaxScore { scorer, max_score }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Deref for TermScorerWithMaxScore<'a> {
|
||||||
|
type Target = TermScorer;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.scorer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
|
self.scorer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
|
||||||
|
if let Some(first) = it.next() {
|
||||||
|
let mut prev = first;
|
||||||
|
for doc in it {
|
||||||
|
if doc < prev {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
prev = doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::query::score_combiner::SumCombiner;
|
use crate::query::score_combiner::SumCombiner;
|
||||||
@@ -248,17 +246,21 @@ mod tests {
|
|||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
struct Float(Score);
|
struct Float(Score);
|
||||||
|
|
||||||
impl Eq for Float {}
|
impl Eq for Float {}
|
||||||
|
|
||||||
impl PartialEq for Float {
|
impl PartialEq for Float {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.cmp(&other) == Ordering::Equal
|
self.cmp(&other) == Ordering::Equal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialOrd for Float {
|
impl PartialOrd for Float {
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
Some(self.cmp(other))
|
Some(self.cmp(other))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ord for Float {
|
impl Ord for Float {
|
||||||
fn cmp(&self, other: &Self) -> Ordering {
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
|
other.0.partial_cmp(&self.0).unwrap_or(Ordering::Equal)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use crate::docset::DocSet;
|
|||||||
use crate::postings::SegmentPostings;
|
use crate::postings::SegmentPostings;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::BM25Weight;
|
||||||
use crate::query::explanation::does_not_match;
|
use crate::query::explanation::does_not_match;
|
||||||
use crate::query::weight::{for_each_pruning_scorer, for_each_scorer};
|
use crate::query::weight::for_each_scorer;
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
use crate::query::{Explanation, Scorer};
|
use crate::query::{Explanation, Scorer};
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
@@ -73,8 +73,8 @@ impl Weight for TermWeight {
|
|||||||
reader: &SegmentReader,
|
reader: &SegmentReader,
|
||||||
callback: &mut dyn FnMut(DocId, Score) -> Score,
|
callback: &mut dyn FnMut(DocId, Score) -> Score,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let mut scorer = self.scorer(reader, 1.0)?;
|
let scorer = self.specialized_scorer(reader, 1.0)?;
|
||||||
for_each_pruning_scorer(&mut scorer, threshold, callback);
|
crate::query::boolean_query::block_wand(vec![scorer], threshold, callback);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -138,9 +138,11 @@ impl InnerIndexReader {
|
|||||||
.collect::<crate::Result<_>>()?
|
.collect::<crate::Result<_>>()?
|
||||||
};
|
};
|
||||||
let schema = self.index.schema();
|
let schema = self.index.schema();
|
||||||
let searchers = (0..self.num_searchers)
|
let searchers = std::iter::repeat_with(|| {
|
||||||
.map(|_| Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone()))
|
Searcher::new(schema.clone(), self.index.clone(), segment_readers.clone())
|
||||||
.collect();
|
})
|
||||||
|
.take(self.num_searchers)
|
||||||
|
.collect();
|
||||||
self.searcher_pool.publish_new_generation(searchers);
|
self.searcher_pool.publish_new_generation(searchers);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user