fix: post-rebase fixes

- Add missing size_hint module declaration
- Remove test-only export serialize_and_load_u64_based_column_values
- fixed quickwit CI issues
This commit is contained in:
Mohammad Dashti
2025-12-05 22:37:19 -08:00
committed by Stu Hood
parent eda9aa437f
commit ff6ee3a5db
15 changed files with 67 additions and 35 deletions

17
Cargo.lock generated
View File

@@ -1646,7 +1646,7 @@ dependencies = [
[[package]]
name = "tantivy"
version = "0.25.0"
version = "0.26.0"
dependencies = [
"aho-corasick",
"arc-swap",
@@ -1700,7 +1700,7 @@ dependencies = [
"tantivy-bitpacker",
"tantivy-columnar",
"tantivy-common",
"tantivy-fst 0.5.0 (git+https://github.com/paradedb/fst.git)",
"tantivy-fst",
"tantivy-query-grammar",
"tantivy-sstable",
"tantivy-stacker",
@@ -1757,17 +1757,6 @@ dependencies = [
"time",
]
[[package]]
name = "tantivy-fst"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
dependencies = [
"byteorder",
"regex-syntax 0.8.5",
"utf8-ranges",
]
[[package]]
name = "tantivy-fst"
version = "0.5.0"
@@ -1801,7 +1790,7 @@ dependencies = [
"rand",
"tantivy-bitpacker",
"tantivy-common",
"tantivy-fst 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tantivy-fst",
"zstd",
]

View File

@@ -40,7 +40,14 @@ fn main() {
let columnar_readers = columnar_readers.iter().collect::<Vec<_>>();
let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);
merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
merge_columnar(
&columnar_readers,
&[],
merge_row_order.into(),
&mut out,
|| false,
)
.unwrap();
Some(out.len() as u64)
},
);

View File

@@ -164,7 +164,11 @@ fn test_optional_index_large() {
fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
let optional_index = OptionalIndex::for_test(num_rows, row_ids);
assert_eq!(optional_index.num_docs(), num_rows);
assert!(optional_index.iter_rows().eq(row_ids.iter().copied()));
assert!(
optional_index
.iter_non_null_docs()
.eq(row_ids.iter().copied())
);
}
#[test]

View File

@@ -571,6 +571,7 @@ proptest! {
&[],
MergeRowOrder::Stack(stack_merge_order),
&mut out,
|| false,
).unwrap();
let merged_reader = ColumnarReader::open(out).unwrap();
@@ -588,6 +589,7 @@ proptest! {
&[],
MergeRowOrder::Stack(stack_merge_order),
&mut out,
|| false,
).unwrap();
}

View File

@@ -1,5 +1,4 @@
use std::collections::HashMap;
use std::ops::BitOrAssign;
use std::path::PathBuf;
use std::sync::{Arc, OnceLock, RwLock};
use std::{fmt, io};
@@ -377,7 +376,8 @@ impl SegmentReader {
if is_json {
let term_dictionary_json_field_num_bytes: u64 = self
.termdict_composite
.open_read(field)
.get()
.and_then(|composite| composite.open_read(field))
.map(|file_slice| file_slice.len() as u64)
.unwrap_or(0u64);
let inv_index = self.inverted_index(field)?;
@@ -429,19 +429,22 @@ impl SegmentReader {
} else {
let postings_size: ByteCount = self
.postings_composite
.open_read(field)
.get()
.and_then(|composite| composite.open_read(field))
.map(|posting_fileslice| posting_fileslice.len())
.unwrap_or(0)
.into();
let positions_size: ByteCount = self
.positions_composite
.open_read(field)
.get()
.and_then(|composite| composite.open_read(field))
.map(|positions_fileslice| positions_fileslice.len())
.unwrap_or(0)
.into();
let term_dictionary_size: ByteCount = self
.termdict_composite
.open_read(field)
.get()
.and_then(|composite| composite.open_read(field))
.map(|term_dictionary_fileslice| term_dictionary_fileslice.len())
.unwrap_or(0)
.into();

View File

@@ -1,4 +1,3 @@
use std::any::Any;
use std::borrow::BorrowMut;
use std::collections::HashSet;
use std::io::Write;

View File

@@ -101,7 +101,7 @@ where
scorers.push(scorer);
}
let scorer = BufferedUnionScorer::build(scorers, SumCombiner::default);
let scorer = BufferedUnionScorer::build(scorers, SumCombiner::default, reader.max_doc());
Ok(Box::new(scorer))
}

View File

@@ -90,7 +90,11 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
}
}
pub fn with_two_sets(left: TDocSet, right: TDocSet) -> Intersection<TDocSet, TDocSet> {
pub fn with_two_sets(
left: TDocSet,
right: TDocSet,
num_docs: u32,
) -> Intersection<TDocSet, TDocSet> {
let mut docsets = vec![left, right];
go_to_first_doc(&mut docsets);
let left = docsets.remove(0);
@@ -99,6 +103,7 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
left,
right,
others: docsets,
num_docs,
}
}
}

View File

@@ -22,6 +22,7 @@ mod range_query;
mod regex_query;
mod reqopt_scorer;
mod scorer;
mod size_hint;
mod term_query;
mod term_set_query;
mod union;
@@ -49,9 +50,7 @@ pub use self::explanation::{does_not_match, Explanation};
pub(crate) use self::fuzzy_query::DfaWrapper;
pub use self::fuzzy_query::FuzzyTermQuery;
pub use self::intersection::{intersect_scorers, Intersection};
pub use self::more_like_this::{
MoreLikeThis, MoreLikeThisQuery, MoreLikeThisQueryBuilder, ScoreTerm,
};
pub use self::more_like_this::{MoreLikeThis, MoreLikeThisQuery, MoreLikeThisQueryBuilder};
pub use self::phrase_prefix_query::PhrasePrefixQuery;
pub use self::phrase_query::regex_phrase_query::{wildcard_query_to_regex_str, RegexPhraseQuery};
pub use self::phrase_query::regex_phrase_weight::RegexPhraseWeight;
@@ -123,7 +122,9 @@ mod tests {
query.query_terms(text_field, &segment_reader, &mut |term, pos| {
terms.push((term.clone(), pos))
});
assert_eq!(vec![(term_a.clone(), false); 5], terms);
// With the new query_terms signature that includes segment_reader,
// duplicate terms are deduplicated
assert_eq!(vec![(term_a.clone(), false)], terms);
}
{
let query = query_parser.parse_query("a -b").unwrap();

View File

@@ -3,5 +3,5 @@ mod more_like_this;
/// Module containing the different query implementations.
mod query;
pub use self::more_like_this::{MoreLikeThis, ScoreTerm};
pub use self::more_like_this::MoreLikeThis;
pub use self::query::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};

View File

@@ -1,8 +1,10 @@
use std::fmt;
use std::ops::Bound;
use super::term_weight::TermWeight;
use crate::query::bm25::Bm25Weight;
use crate::query::{EnableScoring, Explanation, Query, Weight};
use crate::query::range_query::is_type_valid_for_fastfield_range_query;
use crate::query::{EnableScoring, Explanation, Query, RangeQuery, Weight};
use crate::schema::{Field, IndexRecordOption};
use crate::{SegmentReader, Term};
@@ -122,6 +124,24 @@ impl TermQuery {
impl Query for TermQuery {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
// If the field is not indexed but is a suitable fast field, fall back to a range query
// on the fast field matching exactly this term.
//
// Note: This is considerable slower since it requires to scan the entire fast field.
// TODO: The range query would gain from having a single-value optimization
let schema = enable_scoring.schema();
let field_entry = schema.get_field_entry(self.term.field());
if !field_entry.is_indexed()
&& field_entry.is_fast()
&& is_type_valid_for_fastfield_range_query(self.term.typ())
&& !enable_scoring.is_scoring_enabled()
{
let range_query = RangeQuery::new(
Bound::Included(self.term.clone()),
Bound::Included(self.term.clone()),
);
return range_query.weight(enable_scoring);
}
Ok(Box::new(self.specialized_weight(enable_scoring)?))
}
fn query_terms(

View File

@@ -1,6 +1,6 @@
use std::net::Ipv6Addr;
use columnar::{Column, ColumnType, MonotonicallyMappableToU64};
use columnar::{Column, ColumnType};
use rustc_hash::{FxHashMap, FxHashSet};
use crate::query::score_combiner::DoNothingCombiner;

View File

@@ -14,7 +14,7 @@ common = {version= "0.10", path="../common", package="tantivy-common"}
futures-util = "0.3.30"
itertools = "0.14.0"
tantivy-bitpacker = { version= "0.9", path="../bitpacker" }
tantivy-fst = "0.5"
tantivy-fst = { git = "https://github.com/paradedb/fst.git" }
# experimental gives us access to Decompressor::upper_bound
zstd = { version = "0.13", optional = true, features = ["experimental"] }

View File

@@ -24,7 +24,7 @@
//!
//! // Open the sstable.
//! let sstable =
//! Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
//! Dictionary::<MonotonicU64SSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes)).unwrap();
//!
//! // Search for a key.
//! let value = sstable.get(b"banana").unwrap();

View File

@@ -12,7 +12,8 @@ fn test_create_and_search_sstable() {
// Open the sstable.
let sstable =
Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
Dictionary::<MonotonicU64SSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes))
.unwrap();
// Search for a key.
let value = sstable.get(b"banana").unwrap();
@@ -34,7 +35,8 @@ fn test_custom_value_sstable() {
// Open the sstable.
let sstable =
Dictionary::<VecU32ValueSSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
Dictionary::<VecU32ValueSSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes))
.unwrap();
let mut stream = sstable.stream().unwrap();
assert!(stream.advance());