diff --git a/src/query/query_parser/logical_ast.rs b/src/query/query_parser/logical_ast.rs index 9130cfb56..1897880ac 100644 --- a/src/query/query_parser/logical_ast.rs +++ b/src/query/query_parser/logical_ast.rs @@ -1,11 +1,16 @@ use query::Occur; +use schema::Field; use schema::Term; use std::fmt; +use std::ops::Bound; +use schema::Type; #[derive(Clone)] pub enum LogicalLiteral { Term(Term), Phrase(Vec), + Range { field: Field, value_type: Type, lower: Bound, upper: Bound }, + All, } #[derive(Clone)] @@ -54,6 +59,8 @@ impl fmt::Debug for LogicalLiteral { match *self { LogicalLiteral::Term(ref term) => write!(formatter, "{:?}", term), LogicalLiteral::Phrase(ref terms) => write!(formatter, "\"{:?}\"", terms), + LogicalLiteral::Range { ref lower, ref upper, .. } => write!(formatter, "({:?} TO {:?})", lower, upper), + LogicalLiteral::All => write!(formatter, "*"), } } } diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 6584abd4e..1eceece1c 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -1,29 +1,36 @@ use super::user_input_ast::*; use combine::char::*; use combine::*; +use query::query_parser::user_input_ast::UserInputBound; + +fn field>() -> impl Parser { + (letter(), many(satisfy(|c: char| c.is_alphanumeric() || c == '_'))) + .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) +} + +fn word>() -> impl Parser { + many1(satisfy(|c: char| c.is_alphanumeric())) +} + + +fn negative_number>() -> impl Parser { + (char('-'), many1(satisfy(|c: char| c.is_numeric()))) + .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) +} fn literal(input: I) -> ParseResult where I: Stream, { let term_val = || { - let word = many1(satisfy(|c: char| c.is_alphanumeric())); let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s); - phrase.or(word) + phrase.or(word()) }; - let negative_numbers = (char('-'), many1(satisfy(|c: char| c.is_numeric()))) - .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); - - let field = ( - letter(), - many(satisfy(|c: char| c.is_alphanumeric() || c == '_')), - ).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); - - let term_val_with_field = negative_numbers.or(term_val()); + let term_val_with_field = negative_number().or(term_val()); let term_query = - (field, char(':'), term_val_with_field).map(|(field_name, _, phrase)| UserInputLiteral { + (field(), char(':'), term_val_with_field).map(|(field_name, _, phrase)| UserInputLiteral { field_name: Some(field_name), phrase, }); @@ -37,6 +44,26 @@ where .parse_stream(input) } +fn range>(input: I) -> ParseResult { + let term_val = || { + word().or(negative_number()) + }; + let lower_bound = { + let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w)); + let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w)); + excl.or(incl) + }; + let upper_bound = { + let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w)); + let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w)); + // TODO: this backtracking should be unnecessary + try(excl).or(incl) + }; + (optional((field(), char(':')).map(|x| x.0)), lower_bound, spaces(), string("TO"), spaces(), upper_bound) + .map(|(field, lower, _, _, _, upper)| UserInputAST::Range { field, lower, upper }) + .parse_stream(input) +} + fn leaf(input: I) -> ParseResult where I: Stream, @@ -45,6 +72,8 @@ where .map(|(_, expr)| UserInputAST::Not(Box::new(expr))) .or((char('+'), parser(leaf)).map(|(_, expr)| UserInputAST::Must(Box::new(expr)))) .or((char('('), parser(parse_to_ast), char(')')).map(|(_, expr, _)| expr)) + .or(char('*').map(|_| UserInputAST::All)) + .or(try(parser(range))) .or(parser(literal)) .parse_stream(input) } @@ -91,6 +120,10 @@ mod test { test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")"); test_parse_query_to_ast_helper("abc:a b", "(abc:\"a\" \"b\")"); test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\""); + test_parse_query_to_ast_helper("foo:[1 TO 5]", "foo:[\"1\" TO \"5\"]"); + test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]"); + test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}"); + test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}"); test_is_parse_err("abc + "); } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index e7c461c92..55e0845c1 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -13,6 +13,10 @@ use schema::{FieldType, Term}; use std::num::ParseIntError; use std::str::FromStr; use tokenizer::TokenizerManager; +use std::ops::Bound; +use query::RangeQuery; +use query::AllQuery; +use std::borrow::Cow; /// Possible error that may happen when parsing a query. #[derive(Debug, PartialEq, Eq)] @@ -39,6 +43,9 @@ pub enum QueryParserError { /// The tokenizer for the given field is unknown /// The two argument strings are the name of the field, the name of the tokenizer UnknownTokenizer(String, String), + /// The query contains a range query with a phrase as one of the bounds. + /// Only terms can be used as bounds. + RangeMustNotHavePhrase, } impl From for QueryParserError { @@ -66,8 +73,8 @@ impl From for QueryParserError { /// by relevance : The user typically just scans through the first few /// documents in order of decreasing relevance and will stop when the documents /// are not relevant anymore. -/// Making it possible to make this behavior customizable is tracked in -/// [issue #27](https://github.com/fulmicoton/tantivy/issues/27). +/// +/// Switching to a default of `AND` can be done by calling `.set_conjunction_by_default()`. /// /// * negative terms: By prepending a term by a `-`, a term can be excluded /// from the search. This is useful for disambiguating a query. @@ -75,6 +82,17 @@ impl From for QueryParserError { /// /// * must terms: By prepending a term by a `+`, a term can be made required for the search. /// +/// * phrase terms: Quoted terms become phrase searches on fields that have positions indexed. +/// e.g., `title:"Barack Obama"` will only find documents that have "barack" immediately followed +/// by "obama". +/// +/// * range terms: Range searches can be done by specifying the start and end bound. These can be +/// inclusive or exclusive. e.g., `title:[a TO c}` will find all documents whose title contains +/// a word lexicographically between `a` and `c` (inclusive lower bound, exclusive upper bound). +/// Inclusive bounds are `[]`, exclusive are `{}`. +/// +/// * all docs query: A plain `*` will match all documents in the index. +/// pub struct QueryParser { schema: Schema, default_fields: Vec, @@ -155,11 +173,12 @@ impl QueryParser { } Ok(ast) } - fn compute_logical_ast_for_leaf( + + fn compute_terms_for_string( &self, field: Field, - phrase: &str, - ) -> Result, QueryParserError> { + phrase: &str + ) -> Result, QueryParserError> { let field_entry = self.schema.get_field_entry(field); let field_type = field_entry.field_type(); if !field_type.is_indexed() { @@ -170,12 +189,12 @@ impl QueryParser { FieldType::I64(_) => { let val: i64 = i64::from_str(phrase)?; let term = Term::from_field_i64(field, val); - Ok(Some(LogicalLiteral::Term(term))) + Ok(vec![term]) } FieldType::U64(_) => { let val: u64 = u64::from_str(phrase)?; let term = Term::from_field_u64(field, val); - Ok(Some(LogicalLiteral::Term(term))) + Ok(vec![term]) } FieldType::Str(ref str_options) => { if let Some(option) = str_options.get_indexing_options() { @@ -194,17 +213,15 @@ impl QueryParser { terms.push(term); }); if terms.is_empty() { - Ok(None) + Ok(vec![]) } else if terms.len() == 1 { - Ok(Some(LogicalLiteral::Term( - terms.into_iter().next().unwrap(), - ))) + Ok(terms) } else { let field_entry = self.schema.get_field_entry(field); let field_type = field_entry.field_type(); if let Some(index_record_option) = field_type.get_index_record_option() { if index_record_option.has_positions() { - Ok(Some(LogicalLiteral::Phrase(terms))) + Ok(terms) } else { let fieldname = self.schema.get_field_name(field).to_string(); Err(QueryParserError::FieldDoesNotHavePositionsIndexed( @@ -224,8 +241,7 @@ impl QueryParser { } } FieldType::HierarchicalFacet => { - let term = Term::from_field_text(field, phrase); - Ok(Some(LogicalLiteral::Term(term))) + Ok(vec![Term::from_field_text(field, phrase)]) } FieldType::Bytes => { let field_name = self.schema.get_field_name(field).to_string(); @@ -234,6 +250,19 @@ impl QueryParser { } } + fn compute_logical_ast_for_leaf( + &self, + field: Field, + phrase: &str, + ) -> Result, QueryParserError> { + let terms = self.compute_terms_for_string(field, phrase)?; + match terms.len() { + 0 => Ok(None), + 1 => Ok(Some(LogicalLiteral::Term(terms.into_iter().next().unwrap()))), + _ => Ok(Some(LogicalLiteral::Phrase(terms))), + } + } + fn default_occur(&self) -> Occur { if self.conjunction_by_default { Occur::Must @@ -242,6 +271,31 @@ impl QueryParser { } } + fn resolve_bound(&self, field: Field, bound: &UserInputBound) -> Result, QueryParserError> { + let terms = self.compute_terms_for_string(field, bound.term_str())?; + if terms.len() != 1 { + return Err(QueryParserError::RangeMustNotHavePhrase) + } + let term = terms.into_iter().next().unwrap(); + match *bound { + UserInputBound::Inclusive(_) => Ok(Bound::Included(term)), + UserInputBound::Exclusive(_) => Ok(Bound::Excluded(term)), + } + } + + fn resolved_fields(&self, given_field: &Option) -> Result, QueryParserError> { + match *given_field { + None => { + if self.default_fields.is_empty() { + Err(QueryParserError::NoDefaultFieldDeclared) + } else { + Ok(Cow::from(&self.default_fields[..])) + } + }, + Some(ref field) => Ok(Cow::from(vec![self.resolve_field_name(&*field)?])), + } + } + fn compute_logical_ast_with_occur( &self, user_input_ast: UserInputAST, @@ -265,6 +319,28 @@ impl QueryParser { let (occur, logical_sub_queries) = self.compute_logical_ast_with_occur(*subquery)?; Ok((compose_occur(Occur::Must, occur), logical_sub_queries)) } + UserInputAST::Range { field, lower, upper } => { + let fields = self.resolved_fields(&field)?; + let mut clauses = fields.iter().map(|&field| { + let field_entry = self.schema.get_field_entry(field); + let value_type = field_entry.field_type().value_type(); + Ok(LogicalAST::Leaf(Box::new(LogicalLiteral::Range { + field, + value_type, + lower: self.resolve_bound(field, &lower)?, + upper: self.resolve_bound(field, &upper)?, + }))) + }).collect::, QueryParserError>>()?; + let result_ast = if clauses.len() == 1 { + clauses.pop().unwrap() + } else { + LogicalAST::Clause(clauses.into_iter().map(|clause| (Occur::Should, clause)).collect()) + }; + Ok((Occur::Should, result_ast)) + } + UserInputAST::All => { + Ok((Occur::Should, LogicalAST::Leaf(Box::new(LogicalLiteral::All)))) + } UserInputAST::Leaf(literal) => { let term_phrases: Vec<(Field, String)> = match literal.field_name { Some(ref field_name) => { @@ -327,6 +403,10 @@ fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box { match logical_literal { LogicalLiteral::Term(term) => Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)), LogicalLiteral::Phrase(terms) => Box::new(PhraseQuery::new(terms)), + LogicalLiteral::Range { field, value_type, lower, upper } => { + Box::new(RangeQuery::new_term_bounds(field, value_type, lower, upper)) + }, + LogicalLiteral::All => Box::new(AllQuery), } } @@ -511,6 +591,31 @@ mod test { Term([0, 0, 0, 0, 98])]\"", false, ); + test_parse_query_to_logical_ast_helper( + "title:[a TO b]", + "(Included(Term([0, 0, 0, 0, 97])) TO \ + Included(Term([0, 0, 0, 0, 98])))", + false, + ); + test_parse_query_to_logical_ast_helper( + "[a TO b]", + "((Included(Term([0, 0, 0, 0, 97])) TO \ + Included(Term([0, 0, 0, 0, 98]))) \ + (Included(Term([0, 0, 0, 1, 97])) TO \ + Included(Term([0, 0, 0, 1, 98]))))", + false, + ); + test_parse_query_to_logical_ast_helper( + "title:{titi TO toto}", + "(Excluded(Term([0, 0, 0, 0, 116, 105, 116, 105])) TO \ + Excluded(Term([0, 0, 0, 0, 116, 111, 116, 111])))", + false, + ); + test_parse_query_to_logical_ast_helper( + "*", + "*", + false, + ); } #[test] diff --git a/src/query/query_parser/user_input_ast.rs b/src/query/query_parser/user_input_ast.rs index 12b419f02..21138e978 100644 --- a/src/query/query_parser/user_input_ast.rs +++ b/src/query/query_parser/user_input_ast.rs @@ -14,10 +14,40 @@ impl fmt::Debug for UserInputLiteral { } } +pub enum UserInputBound { + Inclusive(String), + Exclusive(String), +} + +impl UserInputBound { + fn display_lower(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self { + UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word), + UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word), + } + } + + fn display_upper(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self { + UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word), + UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word), + } + } + + pub fn term_str(&self) -> &str { + match *self { + UserInputBound::Inclusive(ref contents) => contents, + UserInputBound::Exclusive(ref contents) => contents, + } + } +} + pub enum UserInputAST { Clause(Vec>), Not(Box), Must(Box), + Range { field: Option, lower: UserInputBound, upper: UserInputBound }, + All, Leaf(Box), } @@ -45,6 +75,16 @@ impl fmt::Debug for UserInputAST { Ok(()) } UserInputAST::Not(ref subquery) => write!(formatter, "-({:?})", subquery), + UserInputAST::Range { ref field, ref lower, ref upper } => { + if let &Some(ref field) = field { + write!(formatter, "{}:", field)?; + } + lower.display_lower(formatter)?; + write!(formatter, " TO ")?; + upper.display_upper(formatter)?; + Ok(()) + }, + UserInputAST::All => write!(formatter, "*"), UserInputAST::Leaf(ref subquery) => write!(formatter, "{:?}", subquery), } } diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 5cc371745..90453e4e5 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -89,6 +89,28 @@ pub struct RangeQuery { } impl RangeQuery { + /// Creates a new `RangeQuery` from bounded start and end terms. + /// + /// If the value type is not correct, something may go terribly wrong when + /// the `Weight` object is created. + pub fn new_term_bounds( + field: Field, + value_type: Type, + left_bound: Bound, + right_bound: Bound + ) -> RangeQuery { + let verify_and_unwrap_term = |val: &Term| { + assert_eq!(field, val.field()); + val.value_bytes().to_owned() + }; + RangeQuery { + field, + value_type, + left_bound: map_bound(&left_bound, &verify_and_unwrap_term), + right_bound: map_bound(&right_bound, &verify_and_unwrap_term), + } + } + /// Creates a new `RangeQuery` over a `i64` field. /// /// If the field is not of the type `i64`, tantivy