diff --git a/src/query/fastfield_filter/fastfield_filter_query.rs b/src/query/fastfield_filter/fastfield_filter_query.rs new file mode 100644 index 000000000..44d577a52 --- /dev/null +++ b/src/query/fastfield_filter/fastfield_filter_query.rs @@ -0,0 +1,96 @@ +use super::FastFieldFilterWeight; +use query::Query; +use query::Weight; +use Result; +use Searcher; +use schema::Field; +use super::RangeU64; +use std::collections::Bound; +use common::i64_to_u64; +use schema::Schema; +use schema::FieldEntry; +use TantivyError; +use schema::Type; + +#[derive(Debug, Copy, Clone)] +enum TypeInt { + U64, I64 +} + +impl TypeInt { + fn value_type(self) -> Type { + match self { + TypeInt::I64 => Type::I64, + TypeInt::U64 => Type::U64 + } + } +} + +//< TODO i64 range Debug string will not look good in the +// current implementation. Defer conversion to the scorer, or +// back convert values for Debug. +#[derive(Debug, Clone)] +pub struct FastFieldFilterQuery { + field: Field, + range: RangeU64, + int_type: TypeInt, //< just here to check the schema at runtime, as we call `.weight` +} + +fn convert_bound_to_u64(bound: Bound) -> Bound { + match bound { + Bound::Included(val) => + Bound::Excluded(i64_to_u64(val)), + Bound::Excluded(val) => + Bound::Excluded(i64_to_u64(val)), + Bound::Unbounded => Bound::Unbounded + } +} + +impl FastFieldFilterQuery { + + pub fn new_u64(field: Field, low: Bound, high: Bound) -> FastFieldFilterQuery { + FastFieldFilterQuery { + field: field, + range: RangeU64 { low, high }, + int_type: TypeInt::U64 + } + } + + pub fn new_i64(field: Field, low: Bound, high: Bound) -> FastFieldFilterQuery { + FastFieldFilterQuery { + field: field, + range: RangeU64 { + low: convert_bound_to_u64(low), + high: convert_bound_to_u64(high) + }, + int_type: TypeInt::I64 + } + } + + + fn validate_schema(&self, schema: &Schema) -> Result<()> { + let field_entry: &FieldEntry = schema.get_field_entry(self.field); + if !field_entry.is_int_fast() { + return Err(TantivyError::SchemaError(format!( + "Field {:?} is not an int fast field", + field_entry.name() + ))); + } + let expected_value_type = self.int_type.value_type(); + if field_entry.field_type().value_type() != self.int_type.value_type() { + return Err(TantivyError::SchemaError(format!( + "Field {:?} is not a {:?}", + field_entry.name(), + expected_value_type + ))); + } + Ok(()) + } +} + +impl Query for FastFieldFilterQuery { + fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result> { + self.validate_schema(searcher.schema())?; + Ok(Box::new(FastFieldFilterWeight::new(self.field, self.range.clone()))) + } +} diff --git a/src/query/fastfield_filter/fastfield_filter_scorer.rs b/src/query/fastfield_filter/fastfield_filter_scorer.rs new file mode 100644 index 000000000..093c74a00 --- /dev/null +++ b/src/query/fastfield_filter/fastfield_filter_scorer.rs @@ -0,0 +1,58 @@ +use query::Scorer; +use fastfield::FastFieldReader; +use DocId; +use DocSet; +use query::fastfield_filter::RangeU64; + +pub(crate) struct FastFieldFilterScorer { + fastfield_reader: FastFieldReader, + range: RangeU64, + max_doc: DocId, + doc: DocId, +} + +impl FastFieldFilterScorer { + pub fn new(fastfield_reader: FastFieldReader, + range: RangeU64, + max_doc: DocId) -> FastFieldFilterScorer { + FastFieldFilterScorer { + fastfield_reader, + range, + max_doc, + doc: 0u32, + } + } + + fn within_range(&self, doc: DocId) -> bool { + let val = self.fastfield_reader.get(doc); + self.range.contains(val) + } + +} + +impl DocSet for FastFieldFilterScorer { + fn advance(&mut self) -> bool { + for doc in (self.doc + 1)..self.max_doc { + if self.within_range(doc) { + self.doc = doc; + return true; + } + } + self.doc = self.max_doc; + return false; + } + + fn doc(&self) -> u32 { + self.doc + } + + fn size_hint(&self) -> u32 { + self.max_doc + } +} + +impl Scorer for FastFieldFilterScorer { + fn score(&mut self) -> f32 { + 1f32 + } +} diff --git a/src/query/fastfield_filter/fastfield_filter_weight.rs b/src/query/fastfield_filter/fastfield_filter_weight.rs new file mode 100644 index 000000000..6f5139701 --- /dev/null +++ b/src/query/fastfield_filter/fastfield_filter_weight.rs @@ -0,0 +1,29 @@ +use query::Weight; +use schema::Field; +use query::fastfield_filter::RangeU64; +use query::fastfield_filter::FastFieldFilterScorer; +use SegmentReader; +use query::Scorer; +use TantivyError; +use fastfield::FastFieldReader; + +pub struct FastFieldFilterWeight { + field: Field, + range: RangeU64, +} + +impl FastFieldFilterWeight { + pub(crate) fn new(field: Field, range: RangeU64) -> FastFieldFilterWeight { + FastFieldFilterWeight { + field, + range + } + } +} + +impl Weight for FastFieldFilterWeight { + fn scorer(&self, reader: &SegmentReader) -> Result, TantivyError> { + let fastfield_reader: FastFieldReader = reader.fast_field_reader(self.field )?; + Ok(Box::new(FastFieldFilterScorer::new(fastfield_reader, self.range.clone(), reader.max_doc()))) + } +} \ No newline at end of file diff --git a/src/query/fastfield_filter/mod.rs b/src/query/fastfield_filter/mod.rs new file mode 100644 index 000000000..a5cef99ac --- /dev/null +++ b/src/query/fastfield_filter/mod.rs @@ -0,0 +1,47 @@ +use std::collections::Bound; + +mod fastfield_filter_query; +mod fastfield_filter_weight; +mod fastfield_filter_scorer; + + +pub use self::fastfield_filter_query::FastFieldFilterQuery; +use self::fastfield_filter_weight::FastFieldFilterWeight; +use self::fastfield_filter_scorer::FastFieldFilterScorer; + +#[derive(Debug, Clone)] +pub(crate) struct RangeU64 { + pub low: Bound, + pub high: Bound, +} + + +impl RangeU64 { + + fn match_high(&self, val: u64) -> bool { + match self.high { + Bound::Excluded(bound) => + val < bound, + Bound::Included(bound) => + val <= bound, + Bound::Unbounded => + true + } + } + + fn match_low(&self, val: u64) -> bool { + match self.high { + Bound::Excluded(bound) => + bound < val, + Bound::Included(bound) => + bound <= val, + Bound::Unbounded => + true + } + } + + pub fn contains(&self, val: u64) -> bool { + self.match_low(val) && self.match_high(val) + } + +} diff --git a/src/query/mod.rs b/src/query/mod.rs index 7546465fb..7c0a73f33 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -22,6 +22,7 @@ mod scorer; mod term_query; mod union; mod weight; +mod fastfield_filter; #[cfg(test)] mod vec_docset; @@ -54,3 +55,4 @@ pub use self::scorer::ConstScorer; pub use self::scorer::Scorer; pub use self::term_query::TermQuery; pub use self::weight::Weight; +pub use self::fastfield_filter::FastFieldFilterQuery; \ No newline at end of file