mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 09:32:54 +00:00
final edits
This commit is contained in:
@@ -11,9 +11,9 @@
|
||||
// Importing tantivy...
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::fastfield::{FastValue, FastFieldReader};
|
||||
use crate::schema::Field;
|
||||
use crate::collector::{Collector, SegmentCollector};
|
||||
use crate::fastfield::{FastFieldReader, FastValue};
|
||||
use crate::schema::Field;
|
||||
use crate::{Score, SegmentReader, TantivyError};
|
||||
|
||||
/// The `FilterCollector` collector filters docs using a u64 fast field value and a predicate.
|
||||
@@ -56,13 +56,13 @@ use crate::{Score, SegmentReader, TantivyError};
|
||||
/// let filtered_top_docs = searcher.search(&query, &filter_all_collector).unwrap();
|
||||
///
|
||||
/// assert_eq!(filtered_top_docs.len(), 0);
|
||||
///
|
||||
///
|
||||
/// fn date_debug(value: DateTime) -> bool {
|
||||
/// println!("date: {:?}", value);
|
||||
/// assert_eq!(value, DateTime::from_str("1000-04-09T00:00:00+00:00").unwrap());
|
||||
/// (value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
|
||||
/// }
|
||||
///
|
||||
///
|
||||
/// let filter_dates_collector = FilterCollector::new(date, &date_debug, TopDocs::with_limit(2));
|
||||
/// let filtered_date_docs = searcher.search(&query, &filter_all_collector).unwrap();
|
||||
///
|
||||
@@ -125,38 +125,33 @@ where
|
||||
field_entry.name()
|
||||
)));
|
||||
}
|
||||
let schema_type = TPredicateValue::to_type();
|
||||
let requested_type = field_entry.field_type().value_type();
|
||||
if schema_type != requested_type {
|
||||
let requested_type = TPredicateValue::to_type();
|
||||
let field_schema_type = field_entry.field_type().value_type();
|
||||
if requested_type != field_schema_type {
|
||||
return Err(TantivyError::SchemaError(format!(
|
||||
"Field {:?} is of type {:?}!={:?}",
|
||||
field_entry.name(),
|
||||
schema_type,
|
||||
requested_type
|
||||
requested_type,
|
||||
field_schema_type
|
||||
)));
|
||||
}
|
||||
|
||||
let err_closure = || {
|
||||
let field_name = segment_reader.schema().get_field_name(self.field);
|
||||
TantivyError::SchemaError(format!(
|
||||
"Field {:?} is not a u64 fast field.",
|
||||
field_name
|
||||
))
|
||||
};
|
||||
let fast_fields = segment_reader.fast_fields();
|
||||
let fast_field_reader = segment_reader
|
||||
.fast_fields()
|
||||
.typed_fast_field_reader(self.field)
|
||||
.ok_or_else(|| {
|
||||
TantivyError::SchemaError(format!(
|
||||
"{:?} is not declared as a fast field in the schema.",
|
||||
self.field
|
||||
))
|
||||
})?;
|
||||
|
||||
let fast_value_type = TPredicateValue::to_type();
|
||||
// TODO do a runtime check of `fast_value_type` against the schema.
|
||||
|
||||
let fast_field_reader_opt = fast_fields.typed_fast_field_reader(self.field);
|
||||
let fast_field_reader = fast_field_reader_opt
|
||||
.ok_or_else(|| TantivyError::SchemaError(format!("{:?} is not declared as a fast field in the schema.", self.field)))?;
|
||||
let segment_collector = self
|
||||
.collector
|
||||
.for_segment(segment_local_id, segment_reader)?;
|
||||
|
||||
Ok(FilterSegmentCollector {
|
||||
fast_field_reader ,
|
||||
fast_field_reader,
|
||||
segment_collector: segment_collector,
|
||||
predicate: self.predicate,
|
||||
t_predicate_value: PhantomData,
|
||||
|
||||
@@ -8,12 +8,12 @@ use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
|
||||
use crate::collector::{TopDocs, FilterCollector};
|
||||
use crate::collector::{FilterCollector, TopDocs};
|
||||
use crate::query::QueryParser;
|
||||
use crate::schema::{Schema, FAST, TEXT};
|
||||
use crate::DateTime;
|
||||
use std::str::FromStr;
|
||||
use crate::{doc, Index};
|
||||
use std::str::FromStr;
|
||||
|
||||
pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
|
||||
compute_score: true,
|
||||
@@ -25,7 +25,6 @@ pub const TEST_COLLECTOR_WITHOUT_SCORE: TestCollector = TestCollector {
|
||||
|
||||
#[test]
|
||||
pub fn test_filter_collector() {
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let title = schema_builder.add_text_field("title", TEXT);
|
||||
let price = schema_builder.add_u64_field("price", FAST);
|
||||
@@ -36,6 +35,7 @@ pub fn test_filter_collector() {
|
||||
let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap();
|
||||
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()));
|
||||
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()));
|
||||
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()));
|
||||
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()));
|
||||
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
@@ -45,7 +45,11 @@ pub fn test_filter_collector() {
|
||||
|
||||
let query_parser = QueryParser::for_index(&index, vec![title]);
|
||||
let query = query_parser.parse_query("diary").unwrap();
|
||||
let filter_some_collector = FilterCollector::new(price, &|value: u64| value > 20_120u64, TopDocs::with_limit(2));
|
||||
let filter_some_collector = FilterCollector::new(
|
||||
price,
|
||||
&|value: u64| value > 20_120u64,
|
||||
TopDocs::with_limit(2),
|
||||
);
|
||||
let top_docs = searcher.search(&query, &filter_some_collector).unwrap();
|
||||
|
||||
assert_eq!(top_docs.len(), 1);
|
||||
@@ -56,16 +60,14 @@ pub fn test_filter_collector() {
|
||||
|
||||
assert_eq!(filtered_top_docs.len(), 0);
|
||||
|
||||
fn date_debug(value: DateTime) -> bool {
|
||||
println!("date: {:?}", value);
|
||||
assert_eq!(value, DateTime::from_str("1000-04-09T00:00:00+00:00").unwrap());
|
||||
(value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
|
||||
fn date_filter(value: DateTime) -> bool {
|
||||
(value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
|
||||
}
|
||||
|
||||
let filter_dates_collector = FilterCollector::new(date, &date_debug, TopDocs::with_limit(2));
|
||||
let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));
|
||||
let filtered_date_docs = searcher.search(&query, &filter_dates_collector).unwrap();
|
||||
|
||||
assert_eq!(filtered_date_docs.len(), 5);
|
||||
assert_eq!(filtered_date_docs.len(), 2);
|
||||
}
|
||||
|
||||
/// Stores all of the doc ids.
|
||||
|
||||
Reference in New Issue
Block a user