mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 09:12:55 +00:00
334 lines
12 KiB
Rust
334 lines
12 KiB
Rust
use core::fmt;
|
|
use std::ops::{Range, RangeInclusive};
|
|
use std::sync::Arc;
|
|
|
|
use fastfield_codecs::Column;
|
|
|
|
use super::MultiValueIndex;
|
|
use crate::fastfield::MakeZero;
|
|
use crate::DocId;
|
|
|
|
/// Reader for a multivalued fast field.
|
|
///
|
|
/// The reader is implemented as two fast fields, one u64 fast field for the index and one for the
|
|
/// values.
|
|
///
|
|
/// The `vals_reader` will access the concatenated list of all values.
|
|
/// The `idx_reader` associates, for each document, the index of its first value.
|
|
#[derive(Clone)]
|
|
pub struct MultiValuedFastFieldReader<T> {
|
|
idx_reader: MultiValueIndex,
|
|
vals_reader: Arc<dyn Column<T>>,
|
|
}
|
|
|
|
impl<T: PartialOrd + MakeZero + Copy + fmt::Debug> MultiValuedFastFieldReader<T> {
|
|
pub(crate) fn open(
|
|
idx_reader: Arc<dyn Column<u64>>,
|
|
vals_reader: Arc<dyn Column<T>>,
|
|
) -> MultiValuedFastFieldReader<T> {
|
|
Self {
|
|
idx_reader: MultiValueIndex::new(idx_reader),
|
|
vals_reader,
|
|
}
|
|
}
|
|
|
|
/// Returns the array of values associated to the given `doc`.
|
|
#[inline]
|
|
pub fn get_first_val(&self, doc: DocId) -> Option<T> {
|
|
let range = self.idx_reader.range(doc);
|
|
if range.is_empty() {
|
|
return None;
|
|
}
|
|
Some(self.vals_reader.get_val(range.start))
|
|
}
|
|
|
|
/// Returns the array of values associated to the given `doc`.
|
|
#[inline]
|
|
fn get_vals_for_range(&self, range: Range<u32>, vals: &mut Vec<T>) {
|
|
let len = (range.end - range.start) as usize;
|
|
vals.resize(len, T::make_zero());
|
|
self.vals_reader
|
|
.get_range(range.start as u64, &mut vals[..]);
|
|
}
|
|
|
|
/// Returns the index reader
|
|
pub fn get_index_reader(&self) -> &MultiValueIndex {
|
|
&self.idx_reader
|
|
}
|
|
|
|
/// Returns the array of values associated to the given `doc`.
|
|
#[inline]
|
|
pub fn get_vals(&self, doc: DocId, vals: &mut Vec<T>) {
|
|
let range = self.idx_reader.range(doc);
|
|
self.get_vals_for_range(range, vals);
|
|
}
|
|
|
|
/// Iterates over all elements in the fast field
|
|
pub fn iter(&self) -> impl Iterator<Item = T> + '_ {
|
|
self.vals_reader.iter()
|
|
}
|
|
|
|
/// Returns the minimum value for this fast field.
|
|
///
|
|
/// The min value does not take in account of possible
|
|
/// deleted document, and should be considered as a lower bound
|
|
/// of the actual mimimum value.
|
|
pub fn min_value(&self) -> T {
|
|
self.vals_reader.min_value()
|
|
}
|
|
|
|
/// Returns the maximum value for this fast field.
|
|
///
|
|
/// The max value does not take in account of possible
|
|
/// deleted document, and should be considered as an upper bound
|
|
/// of the actual maximum value.
|
|
pub fn max_value(&self) -> T {
|
|
self.vals_reader.max_value()
|
|
}
|
|
|
|
/// Returns the number of values associated with the document `DocId`.
|
|
#[inline]
|
|
pub fn num_vals(&self, doc: DocId) -> u32 {
|
|
self.idx_reader.num_vals_for_doc(doc)
|
|
}
|
|
|
|
/// Returns the overall number of values in this field. It does not include deletes.
|
|
#[inline]
|
|
pub fn total_num_vals(&self) -> u32 {
|
|
assert_eq!(
|
|
self.vals_reader.num_vals(),
|
|
self.get_index_reader().total_num_vals()
|
|
);
|
|
self.idx_reader.total_num_vals()
|
|
}
|
|
|
|
/// Returns the docids matching given doc_id_range and value_range.
|
|
#[inline]
|
|
pub fn get_docids_for_value_range(
|
|
&self,
|
|
value_range: RangeInclusive<T>,
|
|
doc_id_range: Range<u32>,
|
|
positions: &mut Vec<u32>,
|
|
) {
|
|
let position_range = self
|
|
.get_index_reader()
|
|
.docid_range_to_position_range(doc_id_range.clone());
|
|
self.vals_reader
|
|
.get_docids_for_value_range(value_range, position_range, positions);
|
|
|
|
self.idx_reader.positions_to_docids(doc_id_range, positions);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
|
|
use time::{Duration, OffsetDateTime};
|
|
|
|
use crate::collector::Count;
|
|
use crate::core::Index;
|
|
use crate::query::RangeQuery;
|
|
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
|
|
use crate::{DateOptions, DatePrecision, DateTime};
|
|
|
|
#[test]
|
|
fn test_multivalued_date_docids_for_value_range_1() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let date_field = schema_builder.add_date_field(
|
|
"multi_date_field",
|
|
DateOptions::default()
|
|
.set_fast(Cardinality::MultiValues)
|
|
.set_indexed()
|
|
.set_fieldnorm()
|
|
.set_precision(DatePrecision::Microseconds)
|
|
.set_stored(),
|
|
);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
let first_time_stamp = OffsetDateTime::now_utc();
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_utc(first_time_stamp),
|
|
date_field => DateTime::from_utc(first_time_stamp),
|
|
))?;
|
|
// add another second
|
|
let two_secs_ahead = first_time_stamp + Duration::seconds(2);
|
|
index_writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
let reader = searcher.segment_reader(0);
|
|
|
|
let date_ff_reader = reader.fast_fields().dates("multi_date_field").unwrap();
|
|
let mut docids = vec![];
|
|
date_ff_reader.get_docids_for_value_range(
|
|
DateTime::from_utc(first_time_stamp)..=DateTime::from_utc(two_secs_ahead),
|
|
0..5,
|
|
&mut docids,
|
|
);
|
|
assert_eq!(docids, vec![0]);
|
|
|
|
let count_multiples =
|
|
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
|
|
|
|
assert_eq!(
|
|
count_multiples(RangeQuery::new_date(
|
|
"multi_date_field".to_string(),
|
|
DateTime::from_utc(first_time_stamp)..DateTime::from_utc(two_secs_ahead)
|
|
)),
|
|
1
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_multivalued_date_docids_for_value_range_2() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let date_field = schema_builder.add_date_field(
|
|
"multi_date_field",
|
|
DateOptions::default()
|
|
.set_fast(Cardinality::MultiValues)
|
|
// TODO: Test different precision after fixing https://github.com/quickwit-oss/tantivy/issues/1783
|
|
.set_precision(DatePrecision::Microseconds)
|
|
.set_indexed()
|
|
.set_fieldnorm()
|
|
.set_stored(),
|
|
);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
let first_time_stamp = OffsetDateTime::now_utc();
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_utc(first_time_stamp),
|
|
date_field => DateTime::from_utc(first_time_stamp),
|
|
))?;
|
|
index_writer.add_document(doc!())?;
|
|
// add one second
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_utc(first_time_stamp + Duration::seconds(1)),
|
|
))?;
|
|
// add another second
|
|
let two_secs_ahead = first_time_stamp + Duration::seconds(2);
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_utc(two_secs_ahead),
|
|
date_field => DateTime::from_utc(two_secs_ahead),
|
|
date_field => DateTime::from_utc(two_secs_ahead),
|
|
))?;
|
|
// add three seconds
|
|
index_writer.add_document(doc!(
|
|
date_field => DateTime::from_utc(first_time_stamp + Duration::seconds(3)),
|
|
))?;
|
|
index_writer.commit()?;
|
|
|
|
let reader = index.reader()?;
|
|
let searcher = reader.searcher();
|
|
let reader = searcher.segment_reader(0);
|
|
assert_eq!(reader.num_docs(), 5);
|
|
|
|
let date_ff_reader = reader.fast_fields().dates("multi_date_field").unwrap();
|
|
let mut docids = vec![];
|
|
date_ff_reader.get_docids_for_value_range(
|
|
DateTime::from_utc(first_time_stamp)..=DateTime::from_utc(two_secs_ahead),
|
|
0..5,
|
|
&mut docids,
|
|
);
|
|
assert_eq!(docids, vec![0, 2, 3]);
|
|
|
|
let count_multiples =
|
|
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
|
|
|
|
assert_eq!(
|
|
count_multiples(RangeQuery::new_date(
|
|
"multi_date_field".to_string(),
|
|
DateTime::from_utc(first_time_stamp)..DateTime::from_utc(two_secs_ahead)
|
|
)),
|
|
2
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_multifastfield_reader() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer = index.writer_for_tests()?;
|
|
index_writer.add_document(doc!(
|
|
facet_field => Facet::from("/category/cat2"),
|
|
facet_field => Facet::from("/category/cat1"),
|
|
))?;
|
|
index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2")))?;
|
|
index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3")))?;
|
|
index_writer.commit()?;
|
|
let searcher = index.reader()?.searcher();
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let mut facet_reader = segment_reader.facet_reader(facet_field)?;
|
|
|
|
let mut facet = Facet::root();
|
|
{
|
|
facet_reader.facet_from_ord(1, &mut facet).unwrap();
|
|
assert_eq!(facet, Facet::from("/category"));
|
|
}
|
|
{
|
|
facet_reader.facet_from_ord(2, &mut facet).unwrap();
|
|
assert_eq!(facet, Facet::from("/category/cat1"));
|
|
}
|
|
{
|
|
facet_reader.facet_from_ord(3, &mut facet).unwrap();
|
|
assert_eq!(format!("{}", facet), "/category/cat2");
|
|
assert_eq!(facet, Facet::from("/category/cat2"));
|
|
}
|
|
{
|
|
facet_reader.facet_from_ord(4, &mut facet).unwrap();
|
|
assert_eq!(facet, Facet::from("/category/cat3"));
|
|
}
|
|
|
|
let mut vals = Vec::new();
|
|
{
|
|
facet_reader.facet_ords(0, &mut vals);
|
|
assert_eq!(&vals[..], &[2, 3]);
|
|
}
|
|
{
|
|
facet_reader.facet_ords(1, &mut vals);
|
|
assert_eq!(&vals[..], &[3]);
|
|
}
|
|
{
|
|
facet_reader.facet_ords(2, &mut vals);
|
|
assert_eq!(&vals[..], &[4]);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn test_multifastfield_reader_min_max() -> crate::Result<()> {
|
|
let mut schema_builder = Schema::builder();
|
|
let field_options = NumericOptions::default()
|
|
.set_indexed()
|
|
.set_fast(Cardinality::MultiValues);
|
|
let item_field = schema_builder.add_i64_field("items", field_options);
|
|
let schema = schema_builder.build();
|
|
let index = Index::create_in_ram(schema);
|
|
let mut index_writer = index
|
|
.writer_for_tests()
|
|
.expect("Failed to create index writer.");
|
|
index_writer.add_document(doc!(
|
|
item_field => 2i64,
|
|
item_field => 3i64,
|
|
item_field => -2i64,
|
|
))?;
|
|
index_writer.add_document(doc!(item_field => 6i64, item_field => 3i64))?;
|
|
index_writer.add_document(doc!(item_field => 4i64))?;
|
|
index_writer.commit()?;
|
|
let searcher = index.reader()?.searcher();
|
|
let segment_reader = searcher.segment_reader(0);
|
|
let field_reader = segment_reader.fast_fields().i64s("items")?;
|
|
|
|
assert_eq!(field_reader.min_value(), -2);
|
|
assert_eq!(field_reader.max_value(), 6);
|
|
Ok(())
|
|
}
|
|
}
|