mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-12 20:12:54 +00:00
Compare commits
2 Commits
blockwand
...
fieldnorm_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
063ed30f66 | ||
|
|
6db8bb49d6 |
@@ -8,7 +8,7 @@ use crate::directory::ReadOnlySource;
|
||||
use crate::fastfield::DeleteBitSet;
|
||||
use crate::fastfield::FacetReader;
|
||||
use crate::fastfield::FastFieldReaders;
|
||||
use crate::fieldnorm::FieldNormReader;
|
||||
use crate::fieldnorm::{FieldNormReader, FieldNormReaders};
|
||||
use crate::schema::Field;
|
||||
use crate::schema::FieldType;
|
||||
use crate::schema::Schema;
|
||||
@@ -48,7 +48,7 @@ pub struct SegmentReader {
|
||||
positions_composite: CompositeFile,
|
||||
positions_idx_composite: CompositeFile,
|
||||
fast_fields_readers: Arc<FastFieldReaders>,
|
||||
fieldnorms_composite: CompositeFile,
|
||||
fieldnorm_readers: FieldNormReaders,
|
||||
|
||||
store_source: ReadOnlySource,
|
||||
delete_bitset_opt: Option<DeleteBitSet>,
|
||||
@@ -126,8 +126,8 @@ impl SegmentReader {
|
||||
/// They are simply stored as a fast field, serialized in
|
||||
/// the `.fieldnorm` file of the segment.
|
||||
pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader {
|
||||
if let Some(fieldnorm_source) = self.fieldnorms_composite.open_read(field) {
|
||||
FieldNormReader::open(fieldnorm_source)
|
||||
if let Some(fieldnorm_reader) = self.fieldnorm_readers.get_field(field) {
|
||||
fieldnorm_reader
|
||||
} else {
|
||||
let field_name = self.schema.get_field_name(field);
|
||||
let err_msg = format!(
|
||||
@@ -178,8 +178,8 @@ impl SegmentReader {
|
||||
let fast_field_readers =
|
||||
Arc::new(FastFieldReaders::load_all(&schema, &fast_fields_composite)?);
|
||||
|
||||
let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
||||
let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
|
||||
let fieldnorm_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
||||
let fieldnorm_readers = FieldNormReaders::new(fieldnorm_data)?;
|
||||
|
||||
let delete_bitset_opt = if segment.meta().has_deletes() {
|
||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
||||
@@ -195,7 +195,7 @@ impl SegmentReader {
|
||||
termdict_composite,
|
||||
postings_composite,
|
||||
fast_fields_readers: fast_field_readers,
|
||||
fieldnorms_composite,
|
||||
fieldnorm_readers,
|
||||
segment_id: segment.id(),
|
||||
store_source,
|
||||
delete_bitset_opt,
|
||||
@@ -308,7 +308,7 @@ impl SegmentReader {
|
||||
self.positions_composite.space_usage(),
|
||||
self.positions_idx_composite.space_usage(),
|
||||
self.fast_fields_readers.space_usage(),
|
||||
self.fieldnorms_composite.space_usage(),
|
||||
self.fieldnorm_readers.space_usage(),
|
||||
self.get_store_reader().space_usage(),
|
||||
self.delete_bitset_opt
|
||||
.as_ref()
|
||||
|
||||
@@ -21,7 +21,7 @@ mod reader;
|
||||
mod serializer;
|
||||
mod writer;
|
||||
|
||||
pub use self::reader::FieldNormReader;
|
||||
pub use self::reader::{FieldNormReader, FieldNormReaders};
|
||||
pub use self::serializer::FieldNormsSerializer;
|
||||
pub use self::writer::FieldNormsWriter;
|
||||
|
||||
|
||||
@@ -1,6 +1,41 @@
|
||||
use super::{fieldnorm_to_id, id_to_fieldnorm};
|
||||
use crate::common::CompositeFile;
|
||||
use crate::directory::ReadOnlySource;
|
||||
use crate::schema::Field;
|
||||
use crate::space_usage::PerFieldSpaceUsage;
|
||||
use crate::DocId;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Reader for the fieldnorm (for each document, the number of tokens indexed in the
|
||||
/// field) of all indexed fields in the index.
|
||||
///
|
||||
/// Each fieldnorm is approximately compressed over one byte. We refer to this byte as
|
||||
/// `fieldnorm_id`.
|
||||
/// The mapping from `fieldnorm` to `fieldnorm_id` is given by monotonic.
|
||||
#[derive(Clone)]
|
||||
pub struct FieldNormReaders {
|
||||
data: Arc<CompositeFile>,
|
||||
}
|
||||
|
||||
impl FieldNormReaders {
|
||||
/// Creates a field norm reader.
|
||||
pub fn new(source: ReadOnlySource) -> crate::Result<FieldNormReaders> {
|
||||
let data = CompositeFile::open(&source)?;
|
||||
Ok(FieldNormReaders {
|
||||
data: Arc::new(data),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the FieldNormReader for a specific field.
|
||||
pub fn get_field(&self, field: Field) -> Option<FieldNormReader> {
|
||||
self.data.open_read(field).map(FieldNormReader::open)
|
||||
}
|
||||
|
||||
/// Return a break down of the space usage per field.
|
||||
pub fn space_usage(&self) -> PerFieldSpaceUsage {
|
||||
self.data.space_usage()
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the fieldnorm associated to a document.
|
||||
/// The fieldnorm represents the length associated to
|
||||
@@ -19,6 +54,7 @@ use crate::DocId;
|
||||
/// Apart from compression, this scale also makes it possible to
|
||||
/// precompute computationally expensive functions of the fieldnorm
|
||||
/// in a very short array.
|
||||
#[derive(Clone)]
|
||||
pub struct FieldNormReader {
|
||||
data: ReadOnlySource,
|
||||
}
|
||||
@@ -29,6 +65,11 @@ impl FieldNormReader {
|
||||
FieldNormReader { data }
|
||||
}
|
||||
|
||||
/// Returns the number of documents in this segment.
|
||||
pub fn num_docs(&self) -> u32 {
|
||||
self.data.len() as u32
|
||||
}
|
||||
|
||||
/// Returns the `fieldnorm` associated to a doc id.
|
||||
/// The fieldnorm is a value approximating the number
|
||||
/// of tokens in a given field of the `doc_id`.
|
||||
@@ -65,10 +106,11 @@ impl FieldNormReader {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl From<Vec<u32>> for FieldNormReader {
|
||||
fn from(field_norms: Vec<u32>) -> FieldNormReader {
|
||||
impl From<&[u32]> for FieldNormReader {
|
||||
fn from(field_norms: &[u32]) -> FieldNormReader {
|
||||
let field_norms_id = field_norms
|
||||
.into_iter()
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(FieldNormReader::fieldnorm_to_id)
|
||||
.collect::<Vec<u8>>();
|
||||
let field_norms_data = ReadOnlySource::from(field_norms_id);
|
||||
|
||||
@@ -78,11 +78,12 @@ impl FieldNormsWriter {
|
||||
}
|
||||
|
||||
/// Serialize the seen fieldnorm values to the serializer for all fields.
|
||||
pub fn serialize(&self, fieldnorms_serializer: &mut FieldNormsSerializer) -> io::Result<()> {
|
||||
pub fn serialize(&self, mut fieldnorms_serializer: FieldNormsSerializer) -> io::Result<()> {
|
||||
for &field in self.fields.iter() {
|
||||
let fieldnorm_values: &[u8] = &self.fieldnorms_buffer[field.field_id() as usize][..];
|
||||
fieldnorms_serializer.serialize_field(field, fieldnorm_values)?;
|
||||
}
|
||||
fieldnorms_serializer.close()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@ impl IndexMerger {
|
||||
|
||||
fn write_fieldnorms(
|
||||
&self,
|
||||
fieldnorms_serializer: &mut FieldNormsSerializer,
|
||||
mut fieldnorms_serializer: FieldNormsSerializer,
|
||||
) -> crate::Result<()> {
|
||||
let fields = FieldNormsWriter::fields_with_fieldnorm(&self.schema);
|
||||
let mut fieldnorms_data = Vec::with_capacity(self.max_doc as usize);
|
||||
@@ -182,6 +182,7 @@ impl IndexMerger {
|
||||
}
|
||||
fieldnorms_serializer.serialize_field(field, &fieldnorms_data[..])?;
|
||||
}
|
||||
fieldnorms_serializer.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -668,8 +669,10 @@ impl IndexMerger {
|
||||
|
||||
impl SerializableSegment for IndexMerger {
|
||||
fn write(&self, mut serializer: SegmentSerializer) -> crate::Result<u32> {
|
||||
if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
|
||||
self.write_fieldnorms(fieldnorms_serializer)?;
|
||||
}
|
||||
let term_ord_mappings = self.write_postings(serializer.get_postings_serializer())?;
|
||||
self.write_fieldnorms(serializer.get_fieldnorms_serializer())?;
|
||||
self.write_fast_fields(serializer.get_fast_field_serializer(), term_ord_mappings)?;
|
||||
self.write_storable_fields(serializer.get_store_writer())?;
|
||||
serializer.close()?;
|
||||
@@ -1504,12 +1507,9 @@ mod tests {
|
||||
for i in 0..100 {
|
||||
let mut doc = Document::new();
|
||||
doc.add_f64(field, 42.0);
|
||||
|
||||
doc.add_f64(multi_field, 0.24);
|
||||
doc.add_f64(multi_field, 0.27);
|
||||
|
||||
writer.add_document(doc);
|
||||
|
||||
if i % 5 == 0 {
|
||||
writer.commit()?;
|
||||
}
|
||||
@@ -1521,7 +1521,6 @@ mod tests {
|
||||
// If a merging thread fails, we should end up with more
|
||||
// than one segment here
|
||||
assert_eq!(1, index.searchable_segments()?.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,15 +8,16 @@ use crate::store::StoreWriter;
|
||||
/// Segment serializer is in charge of laying out on disk
|
||||
/// the data accumulated and sorted by the `SegmentWriter`.
|
||||
pub struct SegmentSerializer {
|
||||
segment: Segment,
|
||||
store_writer: StoreWriter,
|
||||
fast_field_serializer: FastFieldSerializer,
|
||||
fieldnorms_serializer: FieldNormsSerializer,
|
||||
fieldnorms_serializer: Option<FieldNormsSerializer>,
|
||||
postings_serializer: InvertedIndexSerializer,
|
||||
}
|
||||
|
||||
impl SegmentSerializer {
|
||||
/// Creates a new `SegmentSerializer`.
|
||||
pub fn for_segment(segment: &mut Segment) -> crate::Result<SegmentSerializer> {
|
||||
pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
|
||||
let store_write = segment.open_write(SegmentComponent::STORE)?;
|
||||
|
||||
let fast_field_write = segment.open_write(SegmentComponent::FASTFIELDS)?;
|
||||
@@ -25,15 +26,21 @@ impl SegmentSerializer {
|
||||
let fieldnorms_write = segment.open_write(SegmentComponent::FIELDNORMS)?;
|
||||
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
||||
|
||||
let postings_serializer = InvertedIndexSerializer::open(segment)?;
|
||||
let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
|
||||
Ok(SegmentSerializer {
|
||||
segment,
|
||||
store_writer: StoreWriter::new(store_write),
|
||||
fast_field_serializer,
|
||||
fieldnorms_serializer,
|
||||
fieldnorms_serializer: Some(fieldnorms_serializer),
|
||||
postings_serializer,
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn segment(&self) -> &Segment {
|
||||
&self.segment
|
||||
}
|
||||
|
||||
/// Accessor to the `PostingsSerializer`.
|
||||
pub fn get_postings_serializer(&mut self) -> &mut InvertedIndexSerializer {
|
||||
&mut self.postings_serializer
|
||||
@@ -44,9 +51,11 @@ impl SegmentSerializer {
|
||||
&mut self.fast_field_serializer
|
||||
}
|
||||
|
||||
/// Accessor to the field norm serializer.
|
||||
pub fn get_fieldnorms_serializer(&mut self) -> &mut FieldNormsSerializer {
|
||||
&mut self.fieldnorms_serializer
|
||||
/// Extract the field norm serializer.
|
||||
///
|
||||
/// Note the fieldnorms serializer can only be extracted once.
|
||||
pub fn extract_fieldnorms_serializer(&mut self) -> Option<FieldNormsSerializer> {
|
||||
self.fieldnorms_serializer.take()
|
||||
}
|
||||
|
||||
/// Accessor to the `StoreWriter`.
|
||||
@@ -55,11 +64,13 @@ impl SegmentSerializer {
|
||||
}
|
||||
|
||||
/// Finalize the segment serialization.
|
||||
pub fn close(self) -> crate::Result<()> {
|
||||
pub fn close(mut self) -> crate::Result<()> {
|
||||
if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() {
|
||||
fieldnorms_serializer.close()?;
|
||||
}
|
||||
self.fast_field_serializer.close()?;
|
||||
self.postings_serializer.close()?;
|
||||
self.store_writer.close()?;
|
||||
self.fieldnorms_serializer.close()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +112,7 @@ fn merge(
|
||||
target_opstamp: Opstamp,
|
||||
) -> crate::Result<SegmentEntry> {
|
||||
// first we need to apply deletes to our segment.
|
||||
let mut merged_segment = index.new_segment();
|
||||
let merged_segment = index.new_segment();
|
||||
|
||||
// First we apply all of the delet to the merged segment, up to the target opstamp.
|
||||
for segment_entry in &mut segment_entries {
|
||||
@@ -131,12 +131,13 @@ fn merge(
|
||||
let merger: IndexMerger = IndexMerger::open(index.schema(), &segments[..])?;
|
||||
|
||||
// ... we just serialize this index merger in our new segment to merge the two segments.
|
||||
let segment_serializer = SegmentSerializer::for_segment(&mut merged_segment)?;
|
||||
let segment_serializer = SegmentSerializer::for_segment(merged_segment.clone())?;
|
||||
|
||||
let num_docs = merger.write(segment_serializer)?;
|
||||
|
||||
let segment_meta = index.new_segment_meta(merged_segment.id(), num_docs);
|
||||
let merged_segment_id = merged_segment.id();
|
||||
|
||||
let segment_meta = index.new_segment_meta(merged_segment_id, num_docs);
|
||||
Ok(SegmentEntry::new(segment_meta, delete_cursor, None))
|
||||
}
|
||||
|
||||
|
||||
@@ -62,11 +62,12 @@ impl SegmentWriter {
|
||||
/// - schema
|
||||
pub fn for_segment(
|
||||
memory_budget: usize,
|
||||
mut segment: Segment,
|
||||
segment: Segment,
|
||||
schema: &Schema,
|
||||
) -> crate::Result<SegmentWriter> {
|
||||
let tokenizer_manager = segment.index().tokenizers().clone();
|
||||
let table_num_bits = initial_table_size(memory_budget)?;
|
||||
let segment_serializer = SegmentSerializer::for_segment(&mut segment)?;
|
||||
let segment_serializer = SegmentSerializer::for_segment(segment)?;
|
||||
let multifield_postings = MultiFieldPostingsWriter::new(schema, table_num_bits);
|
||||
let tokenizers = schema
|
||||
.fields()
|
||||
@@ -76,7 +77,7 @@ impl SegmentWriter {
|
||||
.get_indexing_options()
|
||||
.and_then(|text_index_option| {
|
||||
let tokenizer_name = &text_index_option.tokenizer();
|
||||
segment.index().tokenizers().get(tokenizer_name)
|
||||
tokenizer_manager.get(tokenizer_name)
|
||||
}),
|
||||
_ => None,
|
||||
},
|
||||
@@ -280,9 +281,11 @@ fn write(
|
||||
fieldnorms_writer: &FieldNormsWriter,
|
||||
mut serializer: SegmentSerializer,
|
||||
) -> crate::Result<()> {
|
||||
if let Some(fieldnorms_serializer) = serializer.extract_fieldnorms_serializer() {
|
||||
fieldnorms_writer.serialize(fieldnorms_serializer)?;
|
||||
}
|
||||
let term_ord_map = multifield_postings.serialize(serializer.get_postings_serializer())?;
|
||||
fast_field_writers.serialize(serializer.get_fast_field_serializer(), &term_ord_map)?;
|
||||
fieldnorms_writer.serialize(serializer.get_fieldnorms_serializer())?;
|
||||
serializer.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
31
src/lib.rs
31
src/lib.rs
@@ -298,17 +298,26 @@ mod tests {
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
|
||||
pub fn assert_nearly_equals(expected: f32, val: f32) {
|
||||
assert!(
|
||||
nearly_equals(val, expected),
|
||||
"Got {}, expected {}.",
|
||||
val,
|
||||
expected
|
||||
);
|
||||
}
|
||||
|
||||
pub fn nearly_equals(a: f32, b: f32) -> bool {
|
||||
(a - b).abs() < 0.0005 * (a + b).abs()
|
||||
/// Checks if left and right are close one to each other.
|
||||
/// Panics if the two values are more than 0.5% apart.
|
||||
#[macro_export]
|
||||
macro_rules! assert_nearly_equals {
|
||||
($left:expr, $right:expr) => {{
|
||||
match (&$left, &$right) {
|
||||
(left_val, right_val) => {
|
||||
let diff = (left_val - right_val).abs();
|
||||
let add = left_val.abs() + right_val.abs();
|
||||
if diff > 0.0005 * add {
|
||||
panic!(
|
||||
r#"assertion failed: `(left ~= right)`
|
||||
left: `{:?}`,
|
||||
right: `{:?}`"#,
|
||||
&*left_val, &*right_val
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
|
||||
|
||||
@@ -139,10 +139,10 @@ impl BM25Weight {
|
||||
mod tests {
|
||||
|
||||
use super::idf;
|
||||
use crate::tests::assert_nearly_equals;
|
||||
use crate::assert_nearly_equals;
|
||||
|
||||
#[test]
|
||||
fn test_idf() {
|
||||
assert_nearly_equals(idf(1, 2), 0.6931472);
|
||||
assert_nearly_equals!(idf(1, 2), 0.6931472);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ pub use self::boolean_query::BooleanQuery;
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::assert_nearly_equals;
|
||||
use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
|
||||
use crate::collector::TopDocs;
|
||||
use crate::query::score_combiner::SumWithCoordsCombiner;
|
||||
@@ -19,7 +20,6 @@ mod tests {
|
||||
use crate::query::Scorer;
|
||||
use crate::query::TermQuery;
|
||||
use crate::schema::*;
|
||||
use crate::tests::assert_nearly_equals;
|
||||
use crate::Index;
|
||||
use crate::{DocAddress, DocId, Score};
|
||||
|
||||
@@ -256,14 +256,14 @@ mod tests {
|
||||
.scorer(searcher.segment_reader(0u32), 1.0f32)
|
||||
.unwrap();
|
||||
assert_eq!(boolean_scorer.doc(), 0u32);
|
||||
assert_nearly_equals(boolean_scorer.score(), 0.84163445f32);
|
||||
assert_nearly_equals!(boolean_scorer.score(), 0.84163445f32);
|
||||
}
|
||||
{
|
||||
let mut boolean_scorer = boolean_weight
|
||||
.scorer(searcher.segment_reader(0u32), 2.0f32)
|
||||
.unwrap();
|
||||
assert_eq!(boolean_scorer.doc(), 0u32);
|
||||
assert_nearly_equals(boolean_scorer.score(), 1.6832689f32);
|
||||
assert_nearly_equals!(boolean_scorer.score(), 1.6832689f32);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -163,10 +163,10 @@ impl Query for FuzzyTermQuery {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::FuzzyTermQuery;
|
||||
use crate::assert_nearly_equals;
|
||||
use crate::collector::TopDocs;
|
||||
use crate::schema::Schema;
|
||||
use crate::schema::TEXT;
|
||||
use crate::tests::assert_nearly_equals;
|
||||
use crate::Index;
|
||||
use crate::Term;
|
||||
|
||||
@@ -199,7 +199,7 @@ mod test {
|
||||
.unwrap();
|
||||
assert_eq!(top_docs.len(), 1, "Expected only 1 document");
|
||||
let (score, _) = top_docs[0];
|
||||
assert_nearly_equals(1f32, score);
|
||||
assert_nearly_equals!(1f32, score);
|
||||
}
|
||||
|
||||
// fails because non-prefix Levenshtein distance is more than 1 (add 'a' and 'n')
|
||||
@@ -223,7 +223,7 @@ mod test {
|
||||
.unwrap();
|
||||
assert_eq!(top_docs.len(), 1, "Expected only 1 document");
|
||||
let (score, _) = top_docs[0];
|
||||
assert_nearly_equals(1f32, score);
|
||||
assert_nearly_equals!(1f32, score);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,11 +10,11 @@ pub use self::phrase_weight::PhraseWeight;
|
||||
pub mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::assert_nearly_equals;
|
||||
use crate::collector::tests::{TEST_COLLECTOR_WITHOUT_SCORE, TEST_COLLECTOR_WITH_SCORE};
|
||||
use crate::core::Index;
|
||||
use crate::query::Weight;
|
||||
use crate::schema::{Schema, Term, TEXT};
|
||||
use crate::tests::assert_nearly_equals;
|
||||
use crate::DocId;
|
||||
use crate::{DocAddress, TERMINATED};
|
||||
|
||||
@@ -175,8 +175,8 @@ pub mod tests {
|
||||
.to_vec()
|
||||
};
|
||||
let scores = test_query(vec!["a", "b"]);
|
||||
assert_nearly_equals(scores[0], 0.40618482);
|
||||
assert_nearly_equals(scores[1], 0.46844664);
|
||||
assert_nearly_equals!(scores[0], 0.40618482);
|
||||
assert_nearly_equals!(scores[1], 0.46844664);
|
||||
}
|
||||
|
||||
#[test] // motivated by #234
|
||||
|
||||
@@ -89,10 +89,10 @@ impl Query for RegexQuery {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::RegexQuery;
|
||||
use crate::assert_nearly_equals;
|
||||
use crate::collector::TopDocs;
|
||||
use crate::schema::TEXT;
|
||||
use crate::schema::{Field, Schema};
|
||||
use crate::tests::assert_nearly_equals;
|
||||
use crate::{Index, IndexReader};
|
||||
use std::sync::Arc;
|
||||
use tantivy_fst::Regex;
|
||||
@@ -129,7 +129,7 @@ mod test {
|
||||
.unwrap();
|
||||
assert_eq!(scored_docs.len(), 1, "Expected only 1 document");
|
||||
let (score, _) = scored_docs[0];
|
||||
assert_nearly_equals(1f32, score);
|
||||
assert_nearly_equals!(1f32, score);
|
||||
}
|
||||
let top_docs = searcher
|
||||
.search(&query_matching_zero, &TopDocs::with_limit(2))
|
||||
|
||||
@@ -9,12 +9,12 @@ pub use self::term_weight::TermWeight;
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::assert_nearly_equals;
|
||||
use crate::collector::TopDocs;
|
||||
use crate::docset::DocSet;
|
||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
use crate::query::{Query, QueryParser, Scorer, TermQuery};
|
||||
use crate::schema::{Field, IndexRecordOption, Schema, STRING, TEXT};
|
||||
use crate::tests::assert_nearly_equals;
|
||||
use crate::Term;
|
||||
use crate::{Index, TERMINATED};
|
||||
|
||||
@@ -105,7 +105,7 @@ mod tests {
|
||||
.unwrap();
|
||||
assert_eq!(topdocs.len(), 1);
|
||||
let (score, _) = topdocs[0];
|
||||
assert_nearly_equals(0.77802235, score);
|
||||
assert_nearly_equals!(0.77802235, score);
|
||||
}
|
||||
{
|
||||
let term = Term::from_field_text(left_field, "left1");
|
||||
@@ -115,9 +115,9 @@ mod tests {
|
||||
.unwrap();
|
||||
assert_eq!(top_docs.len(), 2);
|
||||
let (score1, _) = top_docs[0];
|
||||
assert_nearly_equals(0.27101856, score1);
|
||||
assert_nearly_equals!(0.27101856, score1);
|
||||
let (score2, _) = top_docs[1];
|
||||
assert_nearly_equals(0.13736556, score2);
|
||||
assert_nearly_equals!(0.13736556, score2);
|
||||
}
|
||||
{
|
||||
let query_parser = QueryParser::for_index(&index, vec![]);
|
||||
@@ -125,9 +125,9 @@ mod tests {
|
||||
let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
|
||||
assert_eq!(top_docs.len(), 2);
|
||||
let (score1, _) = top_docs[0];
|
||||
assert_nearly_equals(0.9153879, score1);
|
||||
assert_nearly_equals!(0.9153879, score1);
|
||||
let (score2, _) = top_docs[1];
|
||||
assert_nearly_equals(0.27101856, score2);
|
||||
assert_nearly_equals!(0.27101856, score2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,12 +20,12 @@ pub struct TermWeight {
|
||||
|
||||
impl Weight for TermWeight {
|
||||
fn scorer(&self, reader: &SegmentReader, boost: f32) -> Result<Box<dyn Scorer>> {
|
||||
let term_scorer = self.scorer_specialized(reader, boost)?;
|
||||
let term_scorer = self.specialized_scorer(reader, boost)?;
|
||||
Ok(Box::new(term_scorer))
|
||||
}
|
||||
|
||||
fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result<Explanation> {
|
||||
let mut scorer = self.scorer_specialized(reader, 1.0f32)?;
|
||||
let mut scorer = self.specialized_scorer(reader, 1.0f32)?;
|
||||
if scorer.seek(doc) != doc {
|
||||
return Err(does_not_match(doc));
|
||||
}
|
||||
@@ -52,7 +52,7 @@ impl Weight for TermWeight {
|
||||
reader: &SegmentReader,
|
||||
callback: &mut dyn FnMut(DocId, Score),
|
||||
) -> crate::Result<()> {
|
||||
let mut scorer = self.scorer_specialized(reader, 1.0f32)?;
|
||||
let mut scorer = self.specialized_scorer(reader, 1.0f32)?;
|
||||
for_each_scorer(&mut scorer, callback);
|
||||
Ok(())
|
||||
}
|
||||
@@ -92,7 +92,7 @@ impl TermWeight {
|
||||
}
|
||||
}
|
||||
|
||||
fn scorer_specialized(&self, reader: &SegmentReader, boost: f32) -> Result<TermScorer> {
|
||||
fn specialized_scorer(&self, reader: &SegmentReader, boost: f32) -> Result<TermScorer> {
|
||||
let field = self.term.field();
|
||||
let inverted_index = reader.inverted_index(field);
|
||||
let fieldnorm_reader = reader.get_fieldnorms_reader(field);
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::fmt;
|
||||
/// - a field name
|
||||
/// - a field type, itself wrapping up options describing
|
||||
/// how the field should be indexed.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct FieldEntry {
|
||||
name: String,
|
||||
field_type: FieldType,
|
||||
|
||||
@@ -48,7 +48,7 @@ pub enum Type {
|
||||
|
||||
/// A `FieldType` describes the type (text, u64) of a field as well as
|
||||
/// how it should be handled by tantivy.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum FieldType {
|
||||
/// String field type configuration
|
||||
Str(TextOptions),
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::borrow::Cow;
|
||||
use std::ops::BitOr;
|
||||
|
||||
/// Define how a text field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct TextOptions {
|
||||
indexing: Option<TextFieldIndexing>,
|
||||
stored: bool,
|
||||
@@ -51,7 +51,7 @@ impl Default for TextOptions {
|
||||
/// - the amount of information that should be stored about the presence of a term in a document.
|
||||
/// Essentially, should we store the term frequency and/or the positions (See [`IndexRecordOption`](./enum.IndexRecordOption.html)).
|
||||
/// - the name of the `Tokenizer` that should be used to process the field.
|
||||
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
|
||||
pub struct TextFieldIndexing {
|
||||
record: IndexRecordOption,
|
||||
tokenizer: Cow<'static, str>,
|
||||
|
||||
Reference in New Issue
Block a user