fix: tests in ci (#18)

This commit is contained in:
Neil Hansen
2025-01-14 11:58:01 -08:00
committed by Philippe Noël
parent 0ba892f06f
commit 34c17a1685
15 changed files with 73 additions and 69 deletions

View File

@@ -81,9 +81,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
/// Given two sorted lists of docids `docs` and `hits`, hits is a subset of `docs`.
/// Return all docs that are not in `hits`.
fn find_missing_docs<F>(docs: &[u32], hits: &[u32], mut callback: F)
where
F: FnMut(u32),
{
where F: FnMut(u32) {
let mut docs_iter = docs.iter();
let mut hits_iter = hits.iter();

View File

@@ -1,9 +1,3 @@
use crate::directory::directory_lock::Lock;
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
use crate::directory::{FileHandle, FileSlice, WatchCallback, WatchHandle, WritePtr};
use crate::index::SegmentMetaInventory;
use crate::merge_policy::MergePolicy;
use crate::IndexMeta;
use std::any::Any;
use std::collections::HashSet;
use std::io::Write;
@@ -12,6 +6,13 @@ use std::sync::Arc;
use std::time::Duration;
use std::{fmt, io, thread};
use crate::directory::directory_lock::Lock;
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
use crate::directory::{FileHandle, FileSlice, WatchCallback, WatchHandle, WritePtr};
use crate::index::SegmentMetaInventory;
use crate::merge_policy::MergePolicy;
use crate::IndexMeta;
/// Retry the logic of acquiring locks is pretty simple.
/// We just retry `n` times after a given `duratio`, both
/// depending on the type of lock.
@@ -268,7 +269,8 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
}
// Allows the directory to change the writer's merge policy right before the merge happens
// This is useful for directories that need to change the merge policy based on how many segments were created
// This is useful for directories that need to change the merge policy based on how many
// segments were created
fn reconsider_merge_policy(
&self,
_metas: &IndexMeta,
@@ -285,8 +287,7 @@ pub trait DirectoryClone {
}
impl<T> DirectoryClone for T
where
T: 'static + Directory + Clone,
where T: 'static + Directory + Clone
{
fn box_clone(&self) -> Box<dyn Directory> {
Box::new(self.clone())

View File

@@ -1,4 +1,3 @@
use crc32fast::Hasher;
use std::any::Any;
use std::collections::HashSet;
use std::io::Write;
@@ -6,6 +5,8 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::{io, result};
use crc32fast::Hasher;
use crate::core::MANAGED_FILEPATH;
use crate::directory::error::{DeleteError, LockError, OpenReadError, OpenWriteError};
use crate::directory::footer::{Footer, FooterProxy};
@@ -13,9 +14,9 @@ use crate::directory::{
DirectoryLock, FileHandle, FileSlice, GarbageCollectionResult, Lock, WatchCallback,
WatchHandle, WritePtr, MANAGED_LOCK, META_LOCK,
};
use crate::merge_policy::MergePolicy;
use crate::error::DataCorruption;
use crate::index::SegmentMetaInventory;
use crate::merge_policy::MergePolicy;
use crate::{Directory, IndexMeta};
/// Returns true if the file is "managed".
@@ -356,7 +357,8 @@ impl Directory for ManagedDirectory {
metas: &IndexMeta,
previous_metas: &IndexMeta,
) -> Option<Box<dyn MergePolicy>> {
self.directory.reconsider_merge_policy(metas, previous_metas)
self.directory
.reconsider_merge_policy(metas, previous_metas)
}
}

View File

@@ -1089,7 +1089,10 @@ mod tests {
index_writer.commit()?;
reader.reload().unwrap();
assert_eq!(num_docs_containing("a"), 0);
// In Tantivy upstream, this test results in 0 segments after delete.
// However, due to our custom, visibility rules, we leave the segment.
// See committed_segment_metas in segment_manager.rs.
assert_eq!(num_docs_containing("a"), 1);
index_writer.merge(&segments);
index_writer.wait_merging_threads().unwrap();
@@ -1135,7 +1138,10 @@ mod tests {
index_writer.commit()?;
reader.reload().unwrap();
assert_eq!(num_docs_containing("a"), 0);
// In Tantivy upstream, this test results in 0 segments after delete.
// However, due to our custom, visibility rules, we leave the segment.
// See committed_segment_metas in segment_manager.rs.
assert_eq!(num_docs_containing("a"), 4);
index_writer.merge(&segments);
index_writer.wait_merging_threads().unwrap();

View File

@@ -1032,12 +1032,15 @@ mod tests {
// Test removing all docs
index_writer.delete_term(Term::from_field_text(text_field, "g"));
index_writer.commit()?;
let segment_ids = index.searchable_segment_ids()?;
let _segment_ids = index.searchable_segment_ids()?;
reader.reload()?;
let searcher = reader.searcher();
assert!(segment_ids.is_empty());
assert!(searcher.segment_readers().is_empty());
// In Tantivy upstream, this test results in 0 segments after delete.
// However, due to our custom, visibility rules, we leave the segment.
// See committed_segment_metas in segment_manager.rs.
// assert!(segment_ids.is_empty());
// assert!(searcher.segment_readers().is_empty());
assert_eq!(searcher.num_docs(), 0);
}
Ok(())

View File

@@ -360,8 +360,8 @@ impl SegmentUpdater {
let segment_updater = self.clone();
self.schedule_task(move || {
segment_updater.segment_manager.add_segment(segment_entry);
// mingy98: We don't need to consider merge options for every segment, just at the very end
// segment_updater.consider_merge_options();
// mingy98: We don't need to consider merge options for every segment, just at the very
// end segment_updater.consider_merge_options();
Ok(())
})
}
@@ -776,9 +776,11 @@ mod tests {
}
index_writer.commit()?;
let seg_ids = index.searchable_segment_ids()?;
// docs exist, should have at least 1 segment
assert!(!seg_ids.is_empty());
let _seg_ids = index.searchable_segment_ids()?;
// In Tantivy upstream, this test results in 0 segments after delete.
// However, due to our custom, visibility rules, we leave the segment.
// See committed_segment_metas in segment_manager.rs.
// assert!(!seg_ids.is_empty());
let term = Term::from_field_text(text_field, "a");
index_writer.delete_term(term);
@@ -793,14 +795,15 @@ mod tests {
let reader = index.reader()?;
assert_eq!(reader.searcher().num_docs(), 0);
let seg_ids = index.searchable_segment_ids()?;
assert!(seg_ids.is_empty());
let _seg_ids = index.searchable_segment_ids()?;
// Skipped due to custom ParadeDB visibility rules.
// assert!(seg_ids.is_empty());
reader.reload()?;
assert_eq!(reader.searcher().num_docs(), 0);
// empty segments should be erased
assert!(index.searchable_segment_metas()?.is_empty());
assert!(reader.searcher().segment_readers().is_empty());
// Skipped due to custom ParadeDB visibility rules.
// assert!(index.searchable_segment_metas()?.is_empty());
// assert!(reader.searcher().segment_readers().is_empty());
Ok(())
}
@@ -830,9 +833,11 @@ mod tests {
index_writer.add_document(doc!(text_field=>"f"))?;
index_writer.commit()?;
let seg_ids = index.searchable_segment_ids()?;
// docs exist, should have at least 1 segment
assert!(!seg_ids.is_empty());
let _seg_ids = index.searchable_segment_ids()?;
// In Tantivy upstream, this test results in 0 segments after delete.
// However, due to our custom, visibility rules, we leave the segment.
// See committed_segment_metas in segment_manager.rs.
// assert!(!seg_ids.is_empty());
let term_vals = vec!["a", "b", "c", "d", "e", "f"];
for term_val in term_vals {
@@ -846,14 +851,15 @@ mod tests {
let reader = index.reader()?;
assert_eq!(reader.searcher().num_docs(), 0);
let seg_ids = index.searchable_segment_ids()?;
assert!(seg_ids.is_empty());
let _seg_ids = index.searchable_segment_ids()?;
// Skipped due to custom ParadeDB visibility rules.
// assert!(seg_ids.is_empty());
reader.reload()?;
assert_eq!(reader.searcher().num_docs(), 0);
// empty segments should be erased
assert!(index.searchable_segment_metas()?.is_empty());
assert!(reader.searcher().segment_readers().is_empty());
// Skipped due to custom ParadeDB visibility rules.
// assert!(index.searchable_segment_metas()?.is_empty());
// assert!(reader.searcher().segment_readers().is_empty());
Ok(())
}

View File

@@ -491,10 +491,12 @@ pub(crate) mod tests {
}
let searcher = index.reader()?.searcher();
// finally, check that it's empty
// In Tantivy upstream, this test results in 0 segments after delete.
// However, due to our custom, visibility rules, we leave the segment.
// See committed_segment_metas in segment_manager.rs.
{
let searchable_segment_ids = index.searchable_segment_ids()?;
assert!(searchable_segment_ids.is_empty());
let _searchable_segment_ids = index.searchable_segment_ids()?;
// assert!(searchable_segment_ids.is_empty());
assert_eq!(searcher.num_docs(), 0);
}
Ok(())

View File

@@ -1,5 +1,3 @@
use crate::query::fuzzy_query::DfaWrapper;
use crate::query::score_combiner::SumCombiner;
use std::any::{Any, TypeId};
use std::io;
use std::sync::Arc;
@@ -10,6 +8,8 @@ use super::phrase_prefix_query::prefix_end;
use super::BufferedUnionScorer;
use crate::index::SegmentReader;
use crate::postings::TermInfo;
use crate::query::fuzzy_query::DfaWrapper;
use crate::query::score_combiner::SumCombiner;
use crate::query::{ConstScorer, Explanation, Scorer, Weight};
use crate::schema::{Field, IndexRecordOption};
use crate::termdict::{TermDictionary, TermWithStateStreamer};

View File

@@ -83,8 +83,7 @@ where A: Automaton
}
impl<A> TermStreamer<'_, A>
where
A: Automaton,
where A: Automaton
{
/// Advance position the stream on the next item.
/// Before the first call to `.advance()`, the stream

View File

@@ -42,10 +42,9 @@ use tantivy_fst::Automaton;
#[cfg(feature = "quickwit")]
use self::termdict::TermDictionaryExt;
use self::termdict::TermWithStateStreamerBuilder;
use self::termdict::{
TermDictionary as InnerTermDict, TermDictionaryBuilder as InnerTermDictBuilder,
TermStreamerBuilder,
TermStreamerBuilder, TermWithStateStreamerBuilder,
};
pub use self::termdict::{TermMerger, TermStreamer, TermWithStateStreamer};
use crate::postings::TermInfo;
@@ -161,9 +160,7 @@ impl TermDictionary {
/// Returns a search builder, to stream all of the terms
/// within the Automaton
pub fn search<'a, A: Automaton + 'a>(&'a self, automaton: A) -> TermStreamerBuilder<'a, A>
where
A::State: Clone,
{
where A::State: Clone {
self.0.search(automaton)
}

View File

@@ -5,13 +5,13 @@ mod merger;
use std::iter::ExactSizeIterator;
use common::VInt;
use sstable::streamer::StreamerWithState;
use sstable::value::{ValueReader, ValueWriter};
use sstable::SSTable;
use tantivy_fst::automaton::AlwaysMatch;
use tantivy_fst::Automaton;
pub use self::merger::TermMerger;
use tantivy_fst::Automaton;
use sstable::streamer::StreamerWithState;
use crate::postings::TermInfo;
pub struct TermWithStateStreamerBuilder<'a, A>
@@ -75,9 +75,7 @@ where
}
pub fn into_stream(self) -> io::Result<TermWithStateStreamer<'a, A>> {
let streamer_with_state = self
.streamer_builder
.into_stream_with_state()?;
let streamer_with_state = self.streamer_builder.into_stream_with_state()?;
Ok(TermWithStateStreamer {
streamer_with_state,
})

View File

@@ -137,8 +137,7 @@ pub struct Reader<TValueReader> {
}
impl<TValueReader> Reader<TValueReader>
where
TValueReader: ValueReader,
where TValueReader: ValueReader
{
pub fn advance(&mut self) -> io::Result<bool> {
if !self.delta_reader.advance()? {
@@ -171,8 +170,7 @@ impl<TValueReader> AsRef<[u8]> for Reader<TValueReader> {
}
pub struct Writer<W, TValueWriter>
where
W: io::Write,
where W: io::Write
{
previous_key: Vec<u8>,
index_builder: SSTableIndexBuilder,

View File

@@ -179,8 +179,7 @@ where
}
impl<TSSTable> Streamer<'_, TSSTable, AlwaysMatch>
where
TSSTable: SSTable,
where TSSTable: SSTable
{
pub fn empty() -> Self {
Streamer {

View File

@@ -266,9 +266,7 @@ impl SharedArenaHashMap {
/// Get a value associated to a key.
#[inline]
pub fn get<V>(&self, key: &[u8], memory_arena: &MemoryArena) -> Option<V>
where
V: Copy + 'static,
{
where V: Copy + 'static {
let hash = self.get_hash(key);
let mut probe = self.probe(hash);
loop {

View File

@@ -3,11 +3,8 @@ mod test {
use maplit::hashmap;
use tantivy::collector::TopDocs;
use tantivy::query::FuzzyTermQuery;
use tantivy::schema::{Schema, Value};
use tantivy::schema::{STORED, TEXT};
use tantivy::Index;
use tantivy::Term;
use tantivy::{doc, TantivyDocument};
use tantivy::schema::{Schema, Value, STORED, TEXT};
use tantivy::{doc, Index, TantivyDocument, Term};
#[test]
pub fn test_fuzzy_term() {