field norm reader is not an option anymore.

This commit is contained in:
Paul Masurel
2018-03-26 13:25:29 +09:00
parent 9712a75399
commit 4d65771e04
8 changed files with 119 additions and 76 deletions

View File

@@ -85,6 +85,7 @@ impl SegmentReader {
.unwrap_or(0u32)
}
/// Returns true iff some of the documents of the segment have been deleted.
pub fn has_deletes(&self) -> bool {
self.delete_bitset().is_some()
}
@@ -105,12 +106,12 @@ impl SegmentReader {
) -> fastfield::Result<FastFieldReader<Item>> {
let field_entry = self.schema.get_field_entry(field);
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
{
self.fast_fields_composite
.open_read(field)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)
} else {
{
self.fast_fields_composite
.open_read(field)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)
} else {
Err(FastFieldNotAvailableError::new(field_entry))
}
}
@@ -123,17 +124,17 @@ impl SegmentReader {
) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
let field_entry = self.schema.get_field_entry(field);
if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
{
let idx_reader = self.fast_fields_composite
.open_read_with_idx(field, 0)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
let vals_reader = self.fast_fields_composite
.open_read_with_idx(field, 1)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
} else {
{
let idx_reader = self.fast_fields_composite
.open_read_with_idx(field, 0)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
let vals_reader = self.fast_fields_composite
.open_read_with_idx(field, 1)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
} else {
Err(FastFieldNotAvailableError::new(field_entry))
}
}
@@ -170,10 +171,15 @@ impl SegmentReader {
///
/// They are simply stored as a fast field, serialized in
/// the `.fieldnorm` file of the segment.
pub fn get_fieldnorms_reader(&self, field: Field) -> Option<FieldNormReader> {
self.fieldnorms_composite
.open_read(field)
.map(FieldNormReader::open)
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
if let Some(fieldnorm_source) = self.fieldnorms_composite
.open_read(field) {
FieldNormReader::open(fieldnorm_source)
} else {
let field_name = self.schema.get_field_name(field);
let err_msg= format!("Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name);
panic!(err_msg);
}
}
/// Accessor to the segment's `StoreReader`.

View File

@@ -30,12 +30,11 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
if reader.has_deletes() {
// if there are deletes, then we use an approximation
// using the fieldnorm
if let Some(fieldnorms_reader) = reader.get_fieldnorms_reader(field) {
for doc in 0..reader.max_doc() {
if !reader.is_deleted(doc) {
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc);
count[fieldnorm_id as usize] += 1;
}
let fieldnorms_reader = reader.get_fieldnorms_reader(field);
for doc in 0..reader.max_doc() {
if !reader.is_deleted(doc) {
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc);
count[fieldnorm_id as usize] += 1;
}
}
} else {
@@ -133,13 +132,10 @@ impl IndexMerger {
for field in fields {
fieldnorms_data.clear();
for reader in &self.readers {
let fieldnorms_reader_opt = reader.get_fieldnorms_reader(field);
let fieldnorms_reader = reader.get_fieldnorms_reader(field);
for doc_id in 0..reader.max_doc() {
if !reader.is_deleted(doc_id) {
let fieldnorm_id = fieldnorms_reader_opt
.as_ref()
.map(|reader| reader.fieldnorm_id(doc_id))
.unwrap_or(0u8);
let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc_id);
fieldnorms_data.push(fieldnorm_id);
}
}

View File

@@ -399,6 +399,35 @@ mod tests {
}
}
#[test]
fn test_fieldnorm_no_docs_with_field() {
let mut schema_builder = SchemaBuilder::default();
let title_field = schema_builder.add_text_field("title", TEXT);
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_in_ram(schema_builder.build());
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
{
let doc = doc!(text_field=>"a b c");
index_writer.add_document(doc);
}
index_writer.commit().unwrap();
}
{
index.load_searchers().unwrap();
let searcher = index.searcher();
let reader = searcher.segment_reader(0);
{
let fieldnorm_reader = reader.get_fieldnorms_reader(text_field);
assert_eq!(fieldnorm_reader.fieldnorm(0), 3);
}
{
let fieldnorm_reader = reader.get_fieldnorms_reader(title_field);
assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0);
}
}
}
#[test]
fn test_fieldnorm() {
let mut schema_builder = SchemaBuilder::default();
@@ -424,7 +453,7 @@ mod tests {
index.load_searchers().unwrap();
let searcher = index.searcher();
let segment_reader: &SegmentReader = searcher.segment_reader(0);
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field).unwrap();
let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field);
assert_eq!(fieldnorms_reader.fieldnorm(0), 3);
assert_eq!(fieldnorms_reader.fieldnorm(1), 0);
assert_eq!(fieldnorms_reader.fieldnorm(2), 2);

View File

@@ -208,7 +208,7 @@ pub mod tests {
{
let segment_reader = SegmentReader::open(&segment).unwrap();
{
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field).unwrap();
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field) ;
assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5);
assert_eq!(fieldnorm_reader.fieldnorm(1), 2);
for i in 2..1000 {

View File

@@ -7,12 +7,20 @@ use std::borrow::Borrow;
use Score;
use query::term_query::{TermScorerNoDeletes, TermScorerWithDeletes};
pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
let num_docsets = docsets.len();
docsets.sort_by(|left, right| right.size_hint().cmp(&left.size_hint()));
let rarest_opt = docsets.pop();
let second_rarest_opt = docsets.pop();
docsets.reverse();
/// Returns the intersection scorer.
///
/// The score associated to the documents is the sum of the
/// score of the `Scorer`s given in argument.
///
/// For better performance, the function uses a
/// specialized implementation if the two
/// shortest scorers are `TermScorer`s.
pub fn intersect_scorers(mut scorers: Vec<Box<Scorer>>) -> Box<Scorer> {
let num_docsets = scorers.len();
scorers.sort_by(|left, right| right.size_hint().cmp(&left.size_hint()));
let rarest_opt = scorers.pop();
let second_rarest_opt = scorers.pop();
scorers.reverse();
match (rarest_opt, second_rarest_opt) {
(None, None) => box EmptyScorer,
(Some(single_docset), None) => single_docset,
@@ -27,7 +35,7 @@ pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
return box Intersection {
left,
right,
others: docsets,
others: scorers,
num_docsets
}
}
@@ -43,7 +51,7 @@ pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
return box Intersection {
left,
right,
others: docsets,
others: scorers,
num_docsets
}
}
@@ -52,7 +60,7 @@ pub fn intersect_scorers(mut docsets: Vec<Box<Scorer>>) -> Box<Scorer> {
return box Intersection {
left,
right,
others: docsets,
others: scorers,
num_docsets
}
}

View File

@@ -31,7 +31,7 @@ impl Weight for PhraseWeight {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
let similarity_weight = self.similarity_weight.clone();
let field = self.phrase_terms[0].field();
let fieldnorm_reader = reader.get_fieldnorms_reader(field).expect("Failed to find fieldnorm for field");
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
if reader.has_deletes() {
let mut term_postings_list = Vec::new();
for term in &self.phrase_terms {

View File

@@ -8,9 +8,22 @@ use fieldnorm::FieldNormReader;
use query::bm25::BM25Weight;
pub struct TermScorer<TPostings: Postings> {
pub fieldnorm_reader: FieldNormReader,
pub postings: TPostings,
pub similarity_weight: BM25Weight,
postings: TPostings,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight,
}
impl<TPostings: Postings> TermScorer<TPostings> {
pub fn new(postings: TPostings,
fieldnorm_reader: FieldNormReader,
similarity_weight: BM25Weight) -> TermScorer<TPostings> {
TermScorer {
postings,
fieldnorm_reader,
similarity_weight,
}
}
}
impl<TPostings: Postings> DocSet for TermScorer<TPostings> {

View File

@@ -22,44 +22,35 @@ impl Weight for TermWeight {
fn scorer(&self, reader: &SegmentReader) -> Result<Box<Scorer>> {
let field = self.term.field();
let inverted_index = reader.inverted_index(field);
let fieldnorm_reader = reader.get_fieldnorms_reader(field).expect("Failed to find fieldnorm reader for field.");
let scorer: Box<Scorer>;
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
let similarity_weight = self.similarity_weight.clone();
if reader.has_deletes() {
let postings_opt: Option<SegmentPostings<DeleteBitSet>> =
inverted_index.read_postings(&self.term, self.index_record_option);
scorer =
if let Some(segment_postings) = postings_opt {
box TermScorer {
fieldnorm_reader,
postings: segment_postings,
similarity_weight: self.similarity_weight.clone()
}
Ok(box TermScorer::new(segment_postings,
fieldnorm_reader,
similarity_weight))
} else {
box TermScorer {
Ok(box TermScorer::new(
SegmentPostings::<NoDelete>::empty(),
fieldnorm_reader,
postings: SegmentPostings::<NoDelete>::empty(),
similarity_weight: self.similarity_weight.clone()
}
};
similarity_weight))
}
} else {
let postings_opt: Option<SegmentPostings<NoDelete>> =
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
scorer =
if let Some(segment_postings) = postings_opt {
box TermScorer {
fieldnorm_reader,
postings: segment_postings,
similarity_weight: self.similarity_weight.clone()
}
} else {
box TermScorer {
fieldnorm_reader,
postings: SegmentPostings::<NoDelete>::empty(),
similarity_weight: self.similarity_weight.clone()
}
};
inverted_index.read_postings_no_deletes(&self.term, self.index_record_option);
if let Some(segment_postings) = postings_opt {
Ok(box TermScorer::new(segment_postings,
fieldnorm_reader,
similarity_weight))
} else {
Ok(box TermScorer::new(
SegmentPostings::<NoDelete>::empty(),
fieldnorm_reader,
similarity_weight))
}
}
Ok(scorer)
}
fn count(&self, reader: &SegmentReader) -> Result<u32> {