mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
* remove searcher pool and make Searcher cloneable closes #1410 * use SearcherInner in InnerIndexReader
155 lines
6.4 KiB
Rust
155 lines
6.4 KiB
Rust
//! The fieldnorm represents the length associated to
|
|
//! a given Field of a given document.
|
|
//!
|
|
//! This metric is important to compute the score of a
|
|
//! document: a document having a query word in one of its short fields
|
|
//! (e.g. title) is likely to be more relevant than in one of its longer field
|
|
//! (e.g. body).
|
|
//!
|
|
//! It encodes `fieldnorm` on one byte with some precision loss,
|
|
//! using the exact same scheme as Lucene. Each value is placed on a log-scale
|
|
//! that takes values from `0` to `255`.
|
|
//!
|
|
//! A value on this scale is identified by a `fieldnorm_id`.
|
|
//! Apart from compression, this scale also makes it possible to
|
|
//! precompute computationally expensive functions of the fieldnorm
|
|
//! in a very short array.
|
|
//!
|
|
//! This trick is used by the Bm25 similarity.
|
|
mod code;
|
|
mod reader;
|
|
mod serializer;
|
|
mod writer;
|
|
|
|
use self::code::{fieldnorm_to_id, id_to_fieldnorm};
|
|
pub use self::reader::{FieldNormReader, FieldNormReaders};
|
|
pub use self::serializer::FieldNormsSerializer;
|
|
pub use self::writer::FieldNormsWriter;
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::path::Path;
|
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
|
|
use crate::fieldnorm::{FieldNormReader, FieldNormsSerializer, FieldNormsWriter};
|
|
use crate::query::{Query, TermQuery};
|
|
use crate::schema::{
|
|
Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, STORED, TEXT,
|
|
};
|
|
use crate::{Index, Term, TERMINATED};
|
|
|
|
pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
|
|
let mut schema_builder = Schema::builder();
|
|
schema_builder.add_text_field("field", STORED);
|
|
schema_builder.add_text_field("txt_field", TEXT);
|
|
schema_builder.add_text_field(
|
|
"str_field",
|
|
TextOptions::default().set_indexing_options(
|
|
TextFieldIndexing::default()
|
|
.set_index_option(IndexRecordOption::Basic)
|
|
.set_fieldnorms(false),
|
|
),
|
|
);
|
|
schema_builder.build()
|
|
});
|
|
|
|
pub static FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("field").unwrap());
|
|
pub static TXT_FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("txt_field").unwrap());
|
|
pub static STR_FIELD: Lazy<Field> = Lazy::new(|| SCHEMA.get_field("str_field").unwrap());
|
|
|
|
#[test]
|
|
#[should_panic(expected = "Cannot register a given fieldnorm twice")]
|
|
pub fn test_should_panic_when_recording_fieldnorm_twice_for_same_doc() {
|
|
let mut fieldnorm_writers = FieldNormsWriter::for_schema(&SCHEMA);
|
|
fieldnorm_writers.record(0u32, *TXT_FIELD, 5);
|
|
fieldnorm_writers.record(0u32, *TXT_FIELD, 3);
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_fieldnorm() -> crate::Result<()> {
|
|
let path = Path::new("test");
|
|
let directory: RamDirectory = RamDirectory::create();
|
|
{
|
|
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
|
let serializer = FieldNormsSerializer::from_write(write)?;
|
|
let mut fieldnorm_writers = FieldNormsWriter::for_schema(&SCHEMA);
|
|
fieldnorm_writers.record(2u32, *TXT_FIELD, 5);
|
|
fieldnorm_writers.record(3u32, *TXT_FIELD, 3);
|
|
fieldnorm_writers.serialize(serializer, None)?;
|
|
}
|
|
let file = directory.open_read(path)?;
|
|
{
|
|
let fields_composite = CompositeFile::open(&file)?;
|
|
assert!(fields_composite.open_read(*FIELD).is_none());
|
|
assert!(fields_composite.open_read(*STR_FIELD).is_none());
|
|
let data = fields_composite.open_read(*TXT_FIELD).unwrap();
|
|
let fieldnorm_reader = FieldNormReader::open(data)?;
|
|
assert_eq!(fieldnorm_reader.fieldnorm(0u32), 0u32);
|
|
assert_eq!(fieldnorm_reader.fieldnorm(1u32), 0u32);
|
|
assert_eq!(fieldnorm_reader.fieldnorm(2u32), 5u32);
|
|
assert_eq!(fieldnorm_reader.fieldnorm(3u32), 3u32);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_fieldnorm_disabled() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_options = TextOptions::default()
|
|
.set_indexing_options(TextFieldIndexing::default().set_fieldnorms(false));
|
|
let text = schema_builder.add_text_field("text", text_options);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut writer = index.writer_for_tests()?;
|
|
writer.add_document(doc!(text=>"hello"))?;
|
|
writer.add_document(doc!(text=>"hello hello hello"))?;
|
|
writer.commit()?;
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
let query = TermQuery::new(
|
|
Term::from_field_text(text, "hello"),
|
|
IndexRecordOption::WithFreqs,
|
|
);
|
|
let weight = query.weight(&searcher, true)?;
|
|
let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
|
|
assert_eq!(scorer.doc(), 0);
|
|
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
|
|
assert_eq!(scorer.advance(), 1);
|
|
assert_eq!(scorer.doc(), 1);
|
|
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
|
|
assert_eq!(scorer.advance(), TERMINATED);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_fieldnorm_enabled() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let text_options = TextOptions::default()
|
|
.set_indexing_options(TextFieldIndexing::default().set_fieldnorms(true));
|
|
let text = schema_builder.add_text_field("text", text_options);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut writer = index.writer_for_tests()?;
|
|
writer.add_document(doc!(text=>"hello"))?;
|
|
writer.add_document(doc!(text=>"hello hello hello"))?;
|
|
writer.commit()?;
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
let query = TermQuery::new(
|
|
Term::from_field_text(text, "hello"),
|
|
IndexRecordOption::WithFreqs,
|
|
);
|
|
let weight = query.weight(&searcher, true)?;
|
|
let mut scorer = weight.scorer(searcher.segment_reader(0), 1.0f32)?;
|
|
assert_eq!(scorer.doc(), 0);
|
|
assert!((scorer.score() - 0.22920431).abs() < 0.001f32);
|
|
assert_eq!(scorer.advance(), 1);
|
|
assert_eq!(scorer.doc(), 1);
|
|
assert!((scorer.score() - 0.15136132).abs() < 0.001f32);
|
|
assert_eq!(scorer.advance(), TERMINATED);
|
|
Ok(())
|
|
}
|
|
}
|