mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 09:00:42 +00:00
field norm reader is not an option anymore.
This commit is contained in:
@@ -85,6 +85,7 @@ impl SegmentReader {
|
||||
.unwrap_or(0u32)
|
||||
}
|
||||
|
||||
/// Returns true iff some of the documents of the segment have been deleted.
|
||||
pub fn has_deletes(&self) -> bool {
|
||||
self.delete_bitset().is_some()
|
||||
}
|
||||
@@ -105,12 +106,12 @@ impl SegmentReader {
|
||||
) -> fastfield::Result<FastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
|
||||
{
|
||||
self.fast_fields_composite
|
||||
.open_read(field)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)
|
||||
} else {
|
||||
{
|
||||
self.fast_fields_composite
|
||||
.open_read(field)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
@@ -123,17 +124,17 @@ impl SegmentReader {
|
||||
) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
|
||||
{
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
} else {
|
||||
{
|
||||
let idx_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 0)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
let vals_reader = self.fast_fields_composite
|
||||
.open_read_with_idx(field, 1)
|
||||
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
|
||||
.map(FastFieldReader::open)?;
|
||||
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
|
||||
} else {
|
||||
Err(FastFieldNotAvailableError::new(field_entry))
|
||||
}
|
||||
}
|
||||
@@ -170,10 +171,15 @@ impl SegmentReader {
|
||||
///
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<FieldNormReader> {
|
||||
self.fieldnorms_composite
|
||||
.open_read(field)
|
||||
.map(FieldNormReader::open)
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
|
||||
if let Some(fieldnorm_source) = self.fieldnorms_composite
|
||||
.open_read(field) {
|
||||
FieldNormReader::open(fieldnorm_source)
|
||||
} else {
|
||||
let field_name = self.schema.get_field_name(field);
|
||||
let err_msg= format!("Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name);
|
||||
panic!(err_msg);
|
||||
}
|
||||
}
|
||||
|
||||
/// Accessor to the segment's `StoreReader`.
|
||||
|
||||
@@ -30,12 +30,11 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
|
||||
if reader.has_deletes() {
|
||||
// if there are deletes, then we use an approximation
|
||||
// using the fieldnorm
|
||||
if let Some(fieldnorms_reader) = reader.get_fieldnorms_reader(field) {
|
||||
for doc in 0..reader.max_doc() {
|
||||
if !reader.is_deleted(doc) {
|
||||
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc);
|
||||
count[fieldnorm_id as usize] += 1;
|
||||
}
|
||||
let fieldnorms_reader = reader.get_fieldnorms_reader(field);
|
||||
for doc in 0..reader.max_doc() {
|
||||
if !reader.is_deleted(doc) {
|
||||
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc);
|
||||
count[fieldnorm_id as usize] += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -133,13 +132,10 @@ impl IndexMerger {
|
||||
for field in fields {
|
||||
fieldnorms_data.clear();
|
||||
for reader in &self.readers {
|
||||
let fieldnorms_reader_opt = reader.get_fieldnorms_reader(field);
|
||||
let fieldnorms_reader = reader.get_fieldnorms_reader(field);
|
||||
for doc_id in 0..reader.max_doc() {
|
||||
if !reader.is_deleted(doc_id) {
|
||||
let fieldnorm_id = fieldnorms_reader_opt
|
||||
.as_ref()
|
||||
.map(|reader| reader.fieldnorm_id(doc_id))
|
||||
.unwrap_or(0u8);
|
||||
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc_id);
|
||||
fieldnorms_data.push(fieldnorm_id);
|
||||
}
|
||||
}
|
||||
|
||||
31
src/lib.rs
31
src/lib.rs
@@ -399,6 +399,35 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fieldnorm_no_docs_with_field() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
let title_field = schema_builder.add_text_field("title", TEXT);
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
|
||||
{
|
||||
let doc = doc!(text_field=>"a b c");
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
{
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
let reader = searcher.segment_reader(0);
|
||||
{
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(text_field);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(0), 3);
|
||||
}
|
||||
{
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(title_field);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fieldnorm() {
|
||||
let mut schema_builder = SchemaBuilder::default();
|
||||
@@ -424,7 +453,7 @@ mod tests {
|
||||
index.load_searchers().unwrap();
|
||||
let searcher = index.searcher();
|
||||
let segment_reader: &SegmentReader = searcher.segment_reader(0);
|
||||
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field).unwrap();
|
||||
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field);
|
||||
assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
|
||||
assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
|
||||
assert_eq!(fieldnorms_reader.fieldnorm(2), 2);
|
||||
|
||||
@@ -208,7 +208,7 @@ pub mod tests {
|
||||
{
|
||||
let segment_reader = SegmentReader::open(&segment).unwrap();
|
||||
{
|
||||
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field).unwrap();
|
||||
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field) ;
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
|
||||
assert_eq!(fieldnorm_reader.fieldnorm(1), 2);
|
||||
for i in 2..1000 {
|
||||
|
||||
@@ -7,12 +7,20 @@ use std::borrow::Borrow;
|
||||
use Score;
|
||||
use query::term_query::{TermScorerNoDeletes, TermScorerWithDeletes};
|
||||
|
||||
pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
|
||||
let num_docsets = docsets.len();
|
||||
docsets.sort_by(|left, right| right.size_hint().cmp(&left.size_hint()));
|
||||
let rarest_opt = docsets.pop();
|
||||
let second_rarest_opt = docsets.pop();
|
||||
docsets.reverse();
|
||||
/// Returns the intersection scorer.
|
||||
///
|
||||
/// The score associated to the documents is the sum of the
|
||||
/// score of the `Scorer`s given in argument.
|
||||
///
|
||||
/// For better performance, the function uses a
|
||||
/// specialized implementation if the two
|
||||
/// shortest scorers are `TermScorer`s.
|
||||
pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
|
||||
let num_docsets = scorers.len();
|
||||
scorers.sort_by(|left, right| right.size_hint().cmp(&left.size_hint()));
|
||||
let rarest_opt = scorers.pop();
|
||||
let second_rarest_opt = scorers.pop();
|
||||
scorers.reverse();
|
||||
match (rarest_opt, second_rarest_opt) {
|
||||
(None, None) => box EmptyScorer,
|
||||
(Some(single_docset), None) => single_docset,
|
||||
@@ -27,7 +35,7 @@ pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
|
||||
return box Intersection {
|
||||
left,
|
||||
right,
|
||||
others: docsets,
|
||||
others: scorers,
|
||||
num_docsets
|
||||
}
|
||||
}
|
||||
@@ -43,7 +51,7 @@ pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
|
||||
return box Intersection {
|
||||
left,
|
||||
right,
|
||||
others: docsets,
|
||||
others: scorers,
|
||||
num_docsets
|
||||
}
|
||||
}
|
||||
@@ -52,7 +60,7 @@ pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
|
||||
return box Intersection {
|
||||
left,
|
||||
right,
|
||||
others: docsets,
|
||||
others: scorers,
|
||||
num_docsets
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ impl Weight for PhraseWeight {
|
||||
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
|
||||
let similarity_weight = self.similarity_weight.clone();
|
||||
let field = self.phrase_terms[0].field();
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field).expect("Failed to find fieldnorm for field");
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
|
||||
if reader.has_deletes() {
|
||||
let mut term_postings_list = Vec::new();
|
||||
for term in &self.phrase_terms {
|
||||
|
||||
@@ -8,9 +8,22 @@ use fieldnorm::FieldNormReader;
|
||||
use query::bm25::BM25Weight;
|
||||
|
||||
pub struct TermScorer<TPostings: Postings> {
|
||||
pub fieldnorm_reader: FieldNormReader,
|
||||
pub postings: TPostings,
|
||||
pub similarity_weight: BM25Weight,
|
||||
postings: TPostings,
|
||||
fieldnorm_reader: FieldNormReader,
|
||||
similarity_weight: BM25Weight,
|
||||
}
|
||||
|
||||
|
||||
impl<TPostings: Postings> TermScorer<TPostings> {
|
||||
pub fn new(postings: TPostings,
|
||||
fieldnorm_reader: FieldNormReader,
|
||||
similarity_weight: BM25Weight) -> TermScorer<TPostings> {
|
||||
TermScorer {
|
||||
postings,
|
||||
fieldnorm_reader,
|
||||
similarity_weight,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<TPostings: Postings> DocSet for TermScorer<TPostings> {
|
||||
|
||||
@@ -22,44 +22,35 @@ impl Weight for TermWeight {
|
||||
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
|
||||
let field = self.term.field();
|
||||
let inverted_index = reader.inverted_index(field);
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field).expect("Failed to find fieldnorm reader for field.");
|
||||
let scorer: Box<Scorer>;
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
|
||||
let similarity_weight = self.similarity_weight.clone();
|
||||
if reader.has_deletes() {
|
||||
let postings_opt: Option<SegmentPostings<DeleteBitSet>> =
|
||||
inverted_index.read_postings(&self.term, self.index_record_option);
|
||||
scorer =
|
||||
if let Some(segment_postings) = postings_opt {
|
||||
box TermScorer {
|
||||
fieldnorm_reader,
|
||||
postings: segment_postings,
|
||||
similarity_weight: self.similarity_weight.clone()
|
||||
}
|
||||
Ok(box TermScorer::new(segment_postings,
|
||||
fieldnorm_reader,
|
||||
similarity_weight))
|
||||
} else {
|
||||
box TermScorer {
|
||||
Ok(box TermScorer::new(
|
||||
SegmentPostings::<NoDelete>::empty(),
|
||||
fieldnorm_reader,
|
||||
postings: SegmentPostings::<NoDelete>::empty(),
|
||||
similarity_weight: self.similarity_weight.clone()
|
||||
}
|
||||
};
|
||||
similarity_weight))
|
||||
}
|
||||
} else {
|
||||
let postings_opt: Option<SegmentPostings<NoDelete>> =
|
||||
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
|
||||
scorer =
|
||||
if let Some(segment_postings) = postings_opt {
|
||||
box TermScorer {
|
||||
fieldnorm_reader,
|
||||
postings: segment_postings,
|
||||
similarity_weight: self.similarity_weight.clone()
|
||||
}
|
||||
} else {
|
||||
box TermScorer {
|
||||
fieldnorm_reader,
|
||||
postings: SegmentPostings::<NoDelete>::empty(),
|
||||
similarity_weight: self.similarity_weight.clone()
|
||||
}
|
||||
};
|
||||
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
|
||||
if let Some(segment_postings) = postings_opt {
|
||||
Ok(box TermScorer::new(segment_postings,
|
||||
fieldnorm_reader,
|
||||
similarity_weight))
|
||||
} else {
|
||||
Ok(box TermScorer::new(
|
||||
SegmentPostings::<NoDelete>::empty(),
|
||||
fieldnorm_reader,
|
||||
similarity_weight))
|
||||
}
|
||||
}
|
||||
Ok(scorer)
|
||||
}
|
||||
|
||||
fn count(&self, reader: &SegmentReader) -> Result<u32> {
|
||||
|
||||
Reference in New Issue
Block a user