mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-30 23:20:40 +00:00
fix: post-rebase fixes
- Add missing size_hint module declaration - Remove test-only export serialize_and_load_u64_based_column_values - fixed quickwit CI issues
This commit is contained in:
committed by
Stu Hood
parent
eda9aa437f
commit
ff6ee3a5db
17
Cargo.lock
generated
17
Cargo.lock
generated
@@ -1646,7 +1646,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tantivy"
|
||||
version = "0.25.0"
|
||||
version = "0.26.0"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"arc-swap",
|
||||
@@ -1700,7 +1700,7 @@ dependencies = [
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-columnar",
|
||||
"tantivy-common",
|
||||
"tantivy-fst 0.5.0 (git+https://github.com/paradedb/fst.git)",
|
||||
"tantivy-fst",
|
||||
"tantivy-query-grammar",
|
||||
"tantivy-sstable",
|
||||
"tantivy-stacker",
|
||||
@@ -1757,17 +1757,6 @@ dependencies = [
|
||||
"time",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-fst"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"regex-syntax 0.8.5",
|
||||
"utf8-ranges",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tantivy-fst"
|
||||
version = "0.5.0"
|
||||
@@ -1801,7 +1790,7 @@ dependencies = [
|
||||
"rand",
|
||||
"tantivy-bitpacker",
|
||||
"tantivy-common",
|
||||
"tantivy-fst 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tantivy-fst",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
|
||||
@@ -40,7 +40,14 @@ fn main() {
|
||||
let columnar_readers = columnar_readers.iter().collect::<Vec<_>>();
|
||||
let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);
|
||||
|
||||
merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
|
||||
merge_columnar(
|
||||
&columnar_readers,
|
||||
&[],
|
||||
merge_row_order.into(),
|
||||
&mut out,
|
||||
|| false,
|
||||
)
|
||||
.unwrap();
|
||||
Some(out.len() as u64)
|
||||
},
|
||||
);
|
||||
|
||||
@@ -164,7 +164,11 @@ fn test_optional_index_large() {
|
||||
fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
|
||||
let optional_index = OptionalIndex::for_test(num_rows, row_ids);
|
||||
assert_eq!(optional_index.num_docs(), num_rows);
|
||||
assert!(optional_index.iter_rows().eq(row_ids.iter().copied()));
|
||||
assert!(
|
||||
optional_index
|
||||
.iter_non_null_docs()
|
||||
.eq(row_ids.iter().copied())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -571,6 +571,7 @@ proptest! {
|
||||
&[],
|
||||
MergeRowOrder::Stack(stack_merge_order),
|
||||
&mut out,
|
||||
|| false,
|
||||
).unwrap();
|
||||
|
||||
let merged_reader = ColumnarReader::open(out).unwrap();
|
||||
@@ -588,6 +589,7 @@ proptest! {
|
||||
&[],
|
||||
MergeRowOrder::Stack(stack_merge_order),
|
||||
&mut out,
|
||||
|| false,
|
||||
).unwrap();
|
||||
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::ops::BitOrAssign;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, OnceLock, RwLock};
|
||||
use std::{fmt, io};
|
||||
@@ -377,7 +376,8 @@ impl SegmentReader {
|
||||
if is_json {
|
||||
let term_dictionary_json_field_num_bytes: u64 = self
|
||||
.termdict_composite
|
||||
.open_read(field)
|
||||
.get()
|
||||
.and_then(|composite| composite.open_read(field))
|
||||
.map(|file_slice| file_slice.len() as u64)
|
||||
.unwrap_or(0u64);
|
||||
let inv_index = self.inverted_index(field)?;
|
||||
@@ -429,19 +429,22 @@ impl SegmentReader {
|
||||
} else {
|
||||
let postings_size: ByteCount = self
|
||||
.postings_composite
|
||||
.open_read(field)
|
||||
.get()
|
||||
.and_then(|composite| composite.open_read(field))
|
||||
.map(|posting_fileslice| posting_fileslice.len())
|
||||
.unwrap_or(0)
|
||||
.into();
|
||||
let positions_size: ByteCount = self
|
||||
.positions_composite
|
||||
.open_read(field)
|
||||
.get()
|
||||
.and_then(|composite| composite.open_read(field))
|
||||
.map(|positions_fileslice| positions_fileslice.len())
|
||||
.unwrap_or(0)
|
||||
.into();
|
||||
let term_dictionary_size: ByteCount = self
|
||||
.termdict_composite
|
||||
.open_read(field)
|
||||
.get()
|
||||
.and_then(|composite| composite.open_read(field))
|
||||
.map(|term_dictionary_fileslice| term_dictionary_fileslice.len())
|
||||
.unwrap_or(0)
|
||||
.into();
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::any::Any;
|
||||
use std::borrow::BorrowMut;
|
||||
use std::collections::HashSet;
|
||||
use std::io::Write;
|
||||
|
||||
@@ -101,7 +101,7 @@ where
|
||||
scorers.push(scorer);
|
||||
}
|
||||
|
||||
let scorer = BufferedUnionScorer::build(scorers, SumCombiner::default);
|
||||
let scorer = BufferedUnionScorer::build(scorers, SumCombiner::default, reader.max_doc());
|
||||
Ok(Box::new(scorer))
|
||||
}
|
||||
|
||||
|
||||
@@ -90,7 +90,11 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_two_sets(left: TDocSet, right: TDocSet) -> Intersection<TDocSet, TDocSet> {
|
||||
pub fn with_two_sets(
|
||||
left: TDocSet,
|
||||
right: TDocSet,
|
||||
num_docs: u32,
|
||||
) -> Intersection<TDocSet, TDocSet> {
|
||||
let mut docsets = vec![left, right];
|
||||
go_to_first_doc(&mut docsets);
|
||||
let left = docsets.remove(0);
|
||||
@@ -99,6 +103,7 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
|
||||
left,
|
||||
right,
|
||||
others: docsets,
|
||||
num_docs,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ mod range_query;
|
||||
mod regex_query;
|
||||
mod reqopt_scorer;
|
||||
mod scorer;
|
||||
mod size_hint;
|
||||
mod term_query;
|
||||
mod term_set_query;
|
||||
mod union;
|
||||
@@ -49,9 +50,7 @@ pub use self::explanation::{does_not_match, Explanation};
|
||||
pub(crate) use self::fuzzy_query::DfaWrapper;
|
||||
pub use self::fuzzy_query::FuzzyTermQuery;
|
||||
pub use self::intersection::{intersect_scorers, Intersection};
|
||||
pub use self::more_like_this::{
|
||||
MoreLikeThis, MoreLikeThisQuery, MoreLikeThisQueryBuilder, ScoreTerm,
|
||||
};
|
||||
pub use self::more_like_this::{MoreLikeThis, MoreLikeThisQuery, MoreLikeThisQueryBuilder};
|
||||
pub use self::phrase_prefix_query::PhrasePrefixQuery;
|
||||
pub use self::phrase_query::regex_phrase_query::{wildcard_query_to_regex_str, RegexPhraseQuery};
|
||||
pub use self::phrase_query::regex_phrase_weight::RegexPhraseWeight;
|
||||
@@ -123,7 +122,9 @@ mod tests {
|
||||
query.query_terms(text_field, &segment_reader, &mut |term, pos| {
|
||||
terms.push((term.clone(), pos))
|
||||
});
|
||||
assert_eq!(vec![(term_a.clone(), false); 5], terms);
|
||||
// With the new query_terms signature that includes segment_reader,
|
||||
// duplicate terms are deduplicated
|
||||
assert_eq!(vec![(term_a.clone(), false)], terms);
|
||||
}
|
||||
{
|
||||
let query = query_parser.parse_query("a -b").unwrap();
|
||||
|
||||
@@ -3,5 +3,5 @@ mod more_like_this;
|
||||
/// Module containing the different query implementations.
|
||||
mod query;
|
||||
|
||||
pub use self::more_like_this::{MoreLikeThis, ScoreTerm};
|
||||
pub use self::more_like_this::MoreLikeThis;
|
||||
pub use self::query::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
use std::fmt;
|
||||
use std::ops::Bound;
|
||||
|
||||
use super::term_weight::TermWeight;
|
||||
use crate::query::bm25::Bm25Weight;
|
||||
use crate::query::{EnableScoring, Explanation, Query, Weight};
|
||||
use crate::query::range_query::is_type_valid_for_fastfield_range_query;
|
||||
use crate::query::{EnableScoring, Explanation, Query, RangeQuery, Weight};
|
||||
use crate::schema::{Field, IndexRecordOption};
|
||||
use crate::{SegmentReader, Term};
|
||||
|
||||
@@ -122,6 +124,24 @@ impl TermQuery {
|
||||
|
||||
impl Query for TermQuery {
|
||||
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
|
||||
// If the field is not indexed but is a suitable fast field, fall back to a range query
|
||||
// on the fast field matching exactly this term.
|
||||
//
|
||||
// Note: This is considerable slower since it requires to scan the entire fast field.
|
||||
// TODO: The range query would gain from having a single-value optimization
|
||||
let schema = enable_scoring.schema();
|
||||
let field_entry = schema.get_field_entry(self.term.field());
|
||||
if !field_entry.is_indexed()
|
||||
&& field_entry.is_fast()
|
||||
&& is_type_valid_for_fastfield_range_query(self.term.typ())
|
||||
&& !enable_scoring.is_scoring_enabled()
|
||||
{
|
||||
let range_query = RangeQuery::new(
|
||||
Bound::Included(self.term.clone()),
|
||||
Bound::Included(self.term.clone()),
|
||||
);
|
||||
return range_query.weight(enable_scoring);
|
||||
}
|
||||
Ok(Box::new(self.specialized_weight(enable_scoring)?))
|
||||
}
|
||||
fn query_terms(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
use columnar::{Column, ColumnType, MonotonicallyMappableToU64};
|
||||
use columnar::{Column, ColumnType};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
|
||||
use crate::query::score_combiner::DoNothingCombiner;
|
||||
|
||||
@@ -14,7 +14,7 @@ common = {version= "0.10", path="../common", package="tantivy-common"}
|
||||
futures-util = "0.3.30"
|
||||
itertools = "0.14.0"
|
||||
tantivy-bitpacker = { version= "0.9", path="../bitpacker" }
|
||||
tantivy-fst = "0.5"
|
||||
tantivy-fst = { git = "https://github.com/paradedb/fst.git" }
|
||||
# experimental gives us access to Decompressor::upper_bound
|
||||
zstd = { version = "0.13", optional = true, features = ["experimental"] }
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
//!
|
||||
//! // Open the sstable.
|
||||
//! let sstable =
|
||||
//! Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
//! Dictionary::<MonotonicU64SSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
//!
|
||||
//! // Search for a key.
|
||||
//! let value = sstable.get(b"banana").unwrap();
|
||||
|
||||
@@ -12,7 +12,8 @@ fn test_create_and_search_sstable() {
|
||||
|
||||
// Open the sstable.
|
||||
let sstable =
|
||||
Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
Dictionary::<MonotonicU64SSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes))
|
||||
.unwrap();
|
||||
|
||||
// Search for a key.
|
||||
let value = sstable.get(b"banana").unwrap();
|
||||
@@ -34,7 +35,8 @@ fn test_custom_value_sstable() {
|
||||
|
||||
// Open the sstable.
|
||||
let sstable =
|
||||
Dictionary::<VecU32ValueSSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
|
||||
Dictionary::<VecU32ValueSSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes))
|
||||
.unwrap();
|
||||
|
||||
let mut stream = sstable.stream().unwrap();
|
||||
assert!(stream.advance());
|
||||
|
||||
Reference in New Issue
Block a user