mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 23:20:40 +00:00
fix: tests in ci (#18)
This commit is contained in:
committed by
Philippe Noël
parent
0ba892f06f
commit
34c17a1685
@@ -81,9 +81,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
|
||||
/// Given two sorted lists of docids `docs` and `hits`, hits is a subset of `docs`.
|
||||
/// Return all docs that are not in `hits`.
|
||||
fn find_missing_docs<F>(docs: &[u32], hits: &[u32], mut callback: F)
|
||||
where
|
||||
F: FnMut(u32),
|
||||
{
|
||||
where F: FnMut(u32) {
|
||||
let mut docs_iter = docs.iter();
|
||||
let mut hits_iter = hits.iter();
|
||||
|
||||
|
||||
@@ -1,9 +1,3 @@
|
||||
use crate::directory::directory_lock::Lock;
|
||||
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
|
||||
use crate::directory::{FileHandle, FileSlice, WatchCallback, WatchHandle, WritePtr};
|
||||
use crate::index::SegmentMetaInventory;
|
||||
use crate::merge_policy::MergePolicy;
|
||||
use crate::IndexMeta;
|
||||
use std::any::Any;
|
||||
use std::collections::HashSet;
|
||||
use std::io::Write;
|
||||
@@ -12,6 +6,13 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{fmt, io, thread};
|
||||
|
||||
use crate::directory::directory_lock::Lock;
|
||||
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
|
||||
use crate::directory::{FileHandle, FileSlice, WatchCallback, WatchHandle, WritePtr};
|
||||
use crate::index::SegmentMetaInventory;
|
||||
use crate::merge_policy::MergePolicy;
|
||||
use crate::IndexMeta;
|
||||
|
||||
/// Retry the logic of acquiring locks is pretty simple.
|
||||
/// We just retry `n` times after a given `duratio`, both
|
||||
/// depending on the type of lock.
|
||||
@@ -268,7 +269,8 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
}
|
||||
|
||||
// Allows the directory to change the writer's merge policy right before the merge happens
|
||||
// This is useful for directories that need to change the merge policy based on how many segments were created
|
||||
// This is useful for directories that need to change the merge policy based on how many
|
||||
// segments were created
|
||||
fn reconsider_merge_policy(
|
||||
&self,
|
||||
_metas: &IndexMeta,
|
||||
@@ -285,8 +287,7 @@ pub trait DirectoryClone {
|
||||
}
|
||||
|
||||
impl<T> DirectoryClone for T
|
||||
where
|
||||
T: 'static + Directory + Clone,
|
||||
where T: 'static + Directory + Clone
|
||||
{
|
||||
fn box_clone(&self) -> Box<dyn Directory> {
|
||||
Box::new(self.clone())
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crc32fast::Hasher;
|
||||
use std::any::Any;
|
||||
use std::collections::HashSet;
|
||||
use std::io::Write;
|
||||
@@ -6,6 +5,8 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::{io, result};
|
||||
|
||||
use crc32fast::Hasher;
|
||||
|
||||
use crate::core::MANAGED_FILEPATH;
|
||||
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
|
||||
use crate::directory::footer::{Footer, FooterProxy};
|
||||
@@ -13,9 +14,9 @@ use crate::directory::{
|
||||
DirectoryLock, FileHandle, FileSlice, GarbageCollectionResult, Lock, WatchCallback,
|
||||
WatchHandle, WritePtr, MANAGED_LOCK, META_LOCK,
|
||||
};
|
||||
use crate::merge_policy::MergePolicy;
|
||||
use crate::error::DataCorruption;
|
||||
use crate::index::SegmentMetaInventory;
|
||||
use crate::merge_policy::MergePolicy;
|
||||
use crate::{Directory, IndexMeta};
|
||||
|
||||
/// Returns true if the file is "managed".
|
||||
@@ -356,7 +357,8 @@ impl Directory for ManagedDirectory {
|
||||
metas: &IndexMeta,
|
||||
previous_metas: &IndexMeta,
|
||||
) -> Option<Box<dyn MergePolicy>> {
|
||||
self.directory.reconsider_merge_policy(metas, previous_metas)
|
||||
self.directory
|
||||
.reconsider_merge_policy(metas, previous_metas)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1089,7 +1089,10 @@ mod tests {
|
||||
index_writer.commit()?;
|
||||
|
||||
reader.reload().unwrap();
|
||||
assert_eq!(num_docs_containing("a"), 0);
|
||||
// In Tantivy upstream, this test results in 0 segments after delete.
|
||||
// However, due to our custom, visibility rules, we leave the segment.
|
||||
// See committed_segment_metas in segment_manager.rs.
|
||||
assert_eq!(num_docs_containing("a"), 1);
|
||||
|
||||
index_writer.merge(&segments);
|
||||
index_writer.wait_merging_threads().unwrap();
|
||||
@@ -1135,7 +1138,10 @@ mod tests {
|
||||
index_writer.commit()?;
|
||||
|
||||
reader.reload().unwrap();
|
||||
assert_eq!(num_docs_containing("a"), 0);
|
||||
// In Tantivy upstream, this test results in 0 segments after delete.
|
||||
// However, due to our custom, visibility rules, we leave the segment.
|
||||
// See committed_segment_metas in segment_manager.rs.
|
||||
assert_eq!(num_docs_containing("a"), 4);
|
||||
|
||||
index_writer.merge(&segments);
|
||||
index_writer.wait_merging_threads().unwrap();
|
||||
|
||||
@@ -1032,12 +1032,15 @@ mod tests {
|
||||
// Test removing all docs
|
||||
index_writer.delete_term(Term::from_field_text(text_field, "g"));
|
||||
index_writer.commit()?;
|
||||
let segment_ids = index.searchable_segment_ids()?;
|
||||
let _segment_ids = index.searchable_segment_ids()?;
|
||||
reader.reload()?;
|
||||
|
||||
let searcher = reader.searcher();
|
||||
assert!(segment_ids.is_empty());
|
||||
assert!(searcher.segment_readers().is_empty());
|
||||
// In Tantivy upstream, this test results in 0 segments after delete.
|
||||
// However, due to our custom, visibility rules, we leave the segment.
|
||||
// See committed_segment_metas in segment_manager.rs.
|
||||
// assert!(segment_ids.is_empty());
|
||||
// assert!(searcher.segment_readers().is_empty());
|
||||
assert_eq!(searcher.num_docs(), 0);
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -360,8 +360,8 @@ impl SegmentUpdater {
|
||||
let segment_updater = self.clone();
|
||||
self.schedule_task(move || {
|
||||
segment_updater.segment_manager.add_segment(segment_entry);
|
||||
// mingy98: We don't need to consider merge options for every segment, just at the very end
|
||||
// segment_updater.consider_merge_options();
|
||||
// mingy98: We don't need to consider merge options for every segment, just at the very
|
||||
// end segment_updater.consider_merge_options();
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
@@ -776,9 +776,11 @@ mod tests {
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
// docs exist, should have at least 1 segment
|
||||
assert!(!seg_ids.is_empty());
|
||||
let _seg_ids = index.searchable_segment_ids()?;
|
||||
// In Tantivy upstream, this test results in 0 segments after delete.
|
||||
// However, due to our custom, visibility rules, we leave the segment.
|
||||
// See committed_segment_metas in segment_manager.rs.
|
||||
// assert!(!seg_ids.is_empty());
|
||||
|
||||
let term = Term::from_field_text(text_field, "a");
|
||||
index_writer.delete_term(term);
|
||||
@@ -793,14 +795,15 @@ mod tests {
|
||||
let reader = index.reader()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 0);
|
||||
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
assert!(seg_ids.is_empty());
|
||||
let _seg_ids = index.searchable_segment_ids()?;
|
||||
// Skipped due to custom ParadeDB visibility rules.
|
||||
// assert!(seg_ids.is_empty());
|
||||
|
||||
reader.reload()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 0);
|
||||
// empty segments should be erased
|
||||
assert!(index.searchable_segment_metas()?.is_empty());
|
||||
assert!(reader.searcher().segment_readers().is_empty());
|
||||
// Skipped due to custom ParadeDB visibility rules.
|
||||
// assert!(index.searchable_segment_metas()?.is_empty());
|
||||
// assert!(reader.searcher().segment_readers().is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -830,9 +833,11 @@ mod tests {
|
||||
index_writer.add_document(doc!(text_field=>"f"))?;
|
||||
index_writer.commit()?;
|
||||
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
// docs exist, should have at least 1 segment
|
||||
assert!(!seg_ids.is_empty());
|
||||
let _seg_ids = index.searchable_segment_ids()?;
|
||||
// In Tantivy upstream, this test results in 0 segments after delete.
|
||||
// However, due to our custom, visibility rules, we leave the segment.
|
||||
// See committed_segment_metas in segment_manager.rs.
|
||||
// assert!(!seg_ids.is_empty());
|
||||
|
||||
let term_vals = vec!["a", "b", "c", "d", "e", "f"];
|
||||
for term_val in term_vals {
|
||||
@@ -846,14 +851,15 @@ mod tests {
|
||||
let reader = index.reader()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 0);
|
||||
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
assert!(seg_ids.is_empty());
|
||||
let _seg_ids = index.searchable_segment_ids()?;
|
||||
// Skipped due to custom ParadeDB visibility rules.
|
||||
// assert!(seg_ids.is_empty());
|
||||
|
||||
reader.reload()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 0);
|
||||
// empty segments should be erased
|
||||
assert!(index.searchable_segment_metas()?.is_empty());
|
||||
assert!(reader.searcher().segment_readers().is_empty());
|
||||
// Skipped due to custom ParadeDB visibility rules.
|
||||
// assert!(index.searchable_segment_metas()?.is_empty());
|
||||
// assert!(reader.searcher().segment_readers().is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -491,10 +491,12 @@ pub(crate) mod tests {
|
||||
}
|
||||
let searcher = index.reader()?.searcher();
|
||||
|
||||
// finally, check that it's empty
|
||||
// In Tantivy upstream, this test results in 0 segments after delete.
|
||||
// However, due to our custom, visibility rules, we leave the segment.
|
||||
// See committed_segment_metas in segment_manager.rs.
|
||||
{
|
||||
let searchable_segment_ids = index.searchable_segment_ids()?;
|
||||
assert!(searchable_segment_ids.is_empty());
|
||||
let _searchable_segment_ids = index.searchable_segment_ids()?;
|
||||
// assert!(searchable_segment_ids.is_empty());
|
||||
assert_eq!(searcher.num_docs(), 0);
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use crate::query::fuzzy_query::DfaWrapper;
|
||||
use crate::query::score_combiner::SumCombiner;
|
||||
use std::any::{Any, TypeId};
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
@@ -10,6 +8,8 @@ use super::phrase_prefix_query::prefix_end;
|
||||
use super::BufferedUnionScorer;
|
||||
use crate::index::SegmentReader;
|
||||
use crate::postings::TermInfo;
|
||||
use crate::query::fuzzy_query::DfaWrapper;
|
||||
use crate::query::score_combiner::SumCombiner;
|
||||
use crate::query::{ConstScorer, Explanation, Scorer, Weight};
|
||||
use crate::schema::{Field, IndexRecordOption};
|
||||
use crate::termdict::{TermDictionary, TermWithStateStreamer};
|
||||
|
||||
@@ -83,8 +83,7 @@ where A: Automaton
|
||||
}
|
||||
|
||||
impl<A> TermStreamer<'_, A>
|
||||
where
|
||||
A: Automaton,
|
||||
where A: Automaton
|
||||
{
|
||||
/// Advance position the stream on the next item.
|
||||
/// Before the first call to `.advance()`, the stream
|
||||
|
||||
@@ -42,10 +42,9 @@ use tantivy_fst::Automaton;
|
||||
|
||||
#[cfg(feature = "quickwit")]
|
||||
use self::termdict::TermDictionaryExt;
|
||||
use self::termdict::TermWithStateStreamerBuilder;
|
||||
use self::termdict::{
|
||||
TermDictionary as InnerTermDict, TermDictionaryBuilder as InnerTermDictBuilder,
|
||||
TermStreamerBuilder,
|
||||
TermStreamerBuilder, TermWithStateStreamerBuilder,
|
||||
};
|
||||
pub use self::termdict::{TermMerger, TermStreamer, TermWithStateStreamer};
|
||||
use crate::postings::TermInfo;
|
||||
@@ -161,9 +160,7 @@ impl TermDictionary {
|
||||
/// Returns a search builder, to stream all of the terms
|
||||
/// within the Automaton
|
||||
pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
|
||||
where
|
||||
A::State: Clone,
|
||||
{
|
||||
where A::State: Clone {
|
||||
self.0.search(automaton)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,13 +5,13 @@ mod merger;
|
||||
use std::iter::ExactSizeIterator;
|
||||
|
||||
use common::VInt;
|
||||
use sstable::streamer::StreamerWithState;
|
||||
use sstable::value::{ValueReader, ValueWriter};
|
||||
use sstable::SSTable;
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
use tantivy_fst::Automaton;
|
||||
|
||||
pub use self::merger::TermMerger;
|
||||
use tantivy_fst::Automaton;
|
||||
use sstable::streamer::StreamerWithState;
|
||||
use crate::postings::TermInfo;
|
||||
|
||||
pub struct TermWithStateStreamerBuilder<'a, A>
|
||||
@@ -75,9 +75,7 @@ where
|
||||
}
|
||||
|
||||
pub fn into_stream(self) -> io::Result<TermWithStateStreamer<'a, A>> {
|
||||
let streamer_with_state = self
|
||||
.streamer_builder
|
||||
.into_stream_with_state()?;
|
||||
let streamer_with_state = self.streamer_builder.into_stream_with_state()?;
|
||||
Ok(TermWithStateStreamer {
|
||||
streamer_with_state,
|
||||
})
|
||||
|
||||
@@ -137,8 +137,7 @@ pub struct Reader<TValueReader> {
|
||||
}
|
||||
|
||||
impl<TValueReader> Reader<TValueReader>
|
||||
where
|
||||
TValueReader: ValueReader,
|
||||
where TValueReader: ValueReader
|
||||
{
|
||||
pub fn advance(&mut self) -> io::Result<bool> {
|
||||
if !self.delta_reader.advance()? {
|
||||
@@ -171,8 +170,7 @@ impl<TValueReader> AsRef<[u8]> for Reader<TValueReader> {
|
||||
}
|
||||
|
||||
pub struct Writer<W, TValueWriter>
|
||||
where
|
||||
W: io::Write,
|
||||
where W: io::Write
|
||||
{
|
||||
previous_key: Vec<u8>,
|
||||
index_builder: SSTableIndexBuilder,
|
||||
|
||||
@@ -179,8 +179,7 @@ where
|
||||
}
|
||||
|
||||
impl<TSSTable> Streamer<'_, TSSTable, AlwaysMatch>
|
||||
where
|
||||
TSSTable: SSTable,
|
||||
where TSSTable: SSTable
|
||||
{
|
||||
pub fn empty() -> Self {
|
||||
Streamer {
|
||||
|
||||
@@ -266,9 +266,7 @@ impl SharedArenaHashMap {
|
||||
/// Get a value associated to a key.
|
||||
#[inline]
|
||||
pub fn get<V>(&self, key: &[u8], memory_arena: &MemoryArena) -> Option<V>
|
||||
where
|
||||
V: Copy + 'static,
|
||||
{
|
||||
where V: Copy + 'static {
|
||||
let hash = self.get_hash(key);
|
||||
let mut probe = self.probe(hash);
|
||||
loop {
|
||||
|
||||
@@ -3,11 +3,8 @@ mod test {
|
||||
use maplit::hashmap;
|
||||
use tantivy::collector::TopDocs;
|
||||
use tantivy::query::FuzzyTermQuery;
|
||||
use tantivy::schema::{Schema, Value};
|
||||
use tantivy::schema::{STORED, TEXT};
|
||||
use tantivy::Index;
|
||||
use tantivy::Term;
|
||||
use tantivy::{doc, TantivyDocument};
|
||||
use tantivy::schema::{Schema, Value, STORED, TEXT};
|
||||
use tantivy::{doc, Index, TantivyDocument, Term};
|
||||
|
||||
#[test]
|
||||
pub fn test_fuzzy_term() {
|
||||
|
||||
Reference in New Issue
Block a user