diff --git a/src/analyzer/mod.rs b/src/analyzer/mod.rs index f34e4b8d9..cf29d8187 100644 --- a/src/analyzer/mod.rs +++ b/src/analyzer/mod.rs @@ -56,9 +56,6 @@ pub struct SimpleTokenizer; impl SimpleTokenizer { - pub fn new() -> SimpleTokenizer { - SimpleTokenizer - } pub fn tokenize<'a>(&self, text: &'a str) -> TokenIter<'a> { TokenIter { @@ -71,7 +68,7 @@ impl SimpleTokenizer { #[test] fn test_tokenizer() { - let simple_tokenizer = SimpleTokenizer::new(); + let simple_tokenizer = SimpleTokenizer; let mut term_reader = simple_tokenizer.tokenize("hello, happy tax payer!"); assert_eq!(term_reader.next().unwrap(), "hello"); assert_eq!(term_reader.next().unwrap(), "happy"); @@ -83,7 +80,7 @@ fn test_tokenizer() { #[test] fn test_tokenizer_empty() { - let simple_tokenizer = SimpleTokenizer::new(); + let simple_tokenizer = SimpleTokenizer; let mut term_reader = simple_tokenizer.tokenize(""); assert_eq!(term_reader.next(), None); } diff --git a/src/error.rs b/src/error.rs index 0f7bf1358..6699c7134 100644 --- a/src/error.rs +++ b/src/error.rs @@ -45,8 +45,8 @@ impl From for Error { } } -impl From for Error { - fn from(parsing_error: query::ParsingError) -> Error { +impl From for Error { + fn from(parsing_error: query::QueryParserError) -> Error { Error::InvalidArgument(format!("Query is invalid. {:?}", parsing_error)) } } diff --git a/src/query/mod.rs b/src/query/mod.rs index 40bfd10db..b0c39c286 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -8,23 +8,22 @@ mod boolean_query; mod multi_term_query; mod phrase_query; mod scorer; -mod query_parser; mod occur; mod weight; mod occur_filter; mod term_query; -mod query_parser2; +mod query_parser; -pub use self::occur_filter::OccurFilter; pub use self::boolean_query::BooleanQuery; -pub use self::occur::Occur; -pub use self::query::Query; -pub use self::term_query::TermQuery; -pub use self::phrase_query::PhraseQuery; pub use self::multi_term_query::MultiTermQuery; pub use self::multi_term_query::MultiTermWeight; -pub use self::scorer::Scorer; -pub use self::scorer::EmptyScorer; +pub use self::occur_filter::OccurFilter; +pub use self::occur::Occur; +pub use self::phrase_query::PhraseQuery; +pub use self::query_parser::QueryParserError; pub use self::query_parser::QueryParser; -pub use self::query_parser::ParsingError; +pub use self::query::Query; +pub use self::scorer::EmptyScorer; +pub use self::scorer::Scorer; +pub use self::term_query::TermQuery; pub use self::weight::Weight; diff --git a/src/query/query_parser.rs b/src/query/query_parser.rs deleted file mode 100644 index 0b167ea1d..000000000 --- a/src/query/query_parser.rs +++ /dev/null @@ -1,104 +0,0 @@ -use combine::*; -use combine::char::*; -use query::{Query, MultiTermQuery}; -use schema::{Schema, FieldType, Term, Field}; -use analyzer::SimpleTokenizer; -use analyzer::StreamingIterator; -use query::Occur; - - -/// Possible error that may happen when parsing a query. -#[derive(Debug)] -pub enum ParsingError { - /// Error in the query syntax - SyntaxError, - /// `FieldDoesNotExist(field_name: String)` - /// The query references a field that is not in the schema - FieldDoesNotExist(String), - /// `ExpectedU32(field_name: String, field_value: String)` - /// The query contains a term for a `u32`-field, but the value - /// is not a u32. - ExpectedU32(String, String), -} - -/// Tantivy's Query parser -/// -/// The language covered by the current parser is extremely simple. -/// -/// * simple terms: "e.g.: `Barack Obama` are simply analyzed using -/// tantivy's `StandardTokenizer`, hence becoming `["barack", "obama"]`. -/// The terms are then searched within the default terms of the query parser. -/// -/// e.g. If `body` and `title` are default fields, our example terms are -/// `["title:barack", "body:barack", "title:obama", "body:obama"]`. -/// By default, all tokenized and indexed fields are default fields. -/// -/// Multiple terms are handled as an `OR` : any document containing at least -/// one of the term will go through the scoring. -/// -/// This behavior is slower, but is not a bad idea if the user is sorting -/// by relevance : The user typically just scans through the first few -/// documents in order of decreasing relevance and will stop when the documents -/// are not relevant anymore. -/// Making it possible to make this behavior customizable is tracked in -/// [issue #27](https://github.com/fulmicoton/tantivy/issues/27). -/// -/// * negative terms: By prepending a term by a `-`, a term can be excluded -/// from the search. This is useful for disambiguating a query. -/// e.g. `apple -fruit` -/// -/// * must terms: By prepending a term by a `+`, a term can be made required for the search. -/// -pub struct QueryParser { - schema: Schema, - default_fields: Vec, -} - - - -impl QueryParser { - /// Creates a `QueryParser` - /// * schema - index Schema - /// * default_fields - fields used to search if no field is specifically defined - /// in the query. - pub fn new(schema: Schema, - default_fields: Vec) -> QueryParser { - QueryParser { - schema: schema, - default_fields: default_fields, - } - } - - - - /// Parse a query - /// - /// Note that `parse_query` returns an error if the input - /// is not a valid query. - /// - /// There is currently no lenient mode for the query parser - /// which makes it a bad choice for a public/broad user search engine. - /// - /// Implementing a lenient mode for this query parser is tracked - /// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5) - pub fn parse_query(&self, query: I) -> Result, ParsingError> where I: Stream { - panic!("a"); - } -} - - - - -#[derive(Debug, Eq, PartialEq)] -pub enum Literal { - WithField(String, String), - DefaultField(String), -} - - -pub fn query_language(input: I) -> ParseResult, I> - where I: Stream -{ - panic!("a"); -} - diff --git a/src/query/query_parser2/logical_ast.rs b/src/query/query_parser/logical_ast.rs similarity index 85% rename from src/query/query_parser2/logical_ast.rs rename to src/query/query_parser/logical_ast.rs index e6527fa91..47d15d0e8 100644 --- a/src/query/query_parser2/logical_ast.rs +++ b/src/query/query_parser/logical_ast.rs @@ -16,7 +16,7 @@ pub enum LogicalAST{ fn occur_letter(occur: Occur) -> &'static str { match occur { - Occur::Must => "'+", + Occur::Must => "+", Occur::MustNot => "-", Occur::Should => "", } @@ -31,10 +31,11 @@ impl fmt::Debug for LogicalAST { } else { let (ref occur, ref subquery) = clause[0]; - try!(write!(formatter, "{}{:?}", occur_letter(*occur), subquery)); + try!(write!(formatter, "({}{:?}", occur_letter(*occur), subquery)); for &(ref occur, ref subquery) in &clause[1..] { - try!(write!(formatter, "{}{:?}", occur_letter(*occur), subquery)); + try!(write!(formatter, " {}{:?}", occur_letter(*occur), subquery)); } + try!(formatter.write_str(")")); } Ok(()) } @@ -58,7 +59,6 @@ impl fmt::Debug for LogicalLiteral { write!(formatter, "{:?}", term) }, LogicalLiteral::Phrase(ref terms) => { - // write!(formatter, "\"{}\"", literal) write!(formatter, "\"{:?}\"", terms) } } diff --git a/src/query/query_parser/mod.rs b/src/query/query_parser/mod.rs new file mode 100644 index 000000000..83251158b --- /dev/null +++ b/src/query/query_parser/mod.rs @@ -0,0 +1,7 @@ +mod query_parser; +mod query_grammar; +mod user_input_ast; +mod logical_ast; + +pub use self::query_parser::QueryParser; +pub use self::query_parser::QueryParserError; \ No newline at end of file diff --git a/src/query/query_parser2/query_grammar.rs b/src/query/query_parser/query_grammar.rs similarity index 98% rename from src/query/query_parser2/query_grammar.rs rename to src/query/query_parser/query_grammar.rs index b75849963..5a01828c0 100644 --- a/src/query/query_parser2/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -1,7 +1,6 @@ use combine::*; use combine::char::*; use super::user_input_ast::*; -use schema::{Schema, Field}; fn literal(input: I) -> ParseResult where I: Stream { @@ -61,7 +60,6 @@ pub fn parse_to_ast(input: I) -> ParseResult #[cfg(test)] mod test { - use combine::*; use super::*; fn test_parse_query_to_ast_helper(query: &str, expected: &str) { diff --git a/src/query/query_parser2/query_parser.rs b/src/query/query_parser/query_parser.rs similarity index 69% rename from src/query/query_parser2/query_parser.rs rename to src/query/query_parser/query_parser.rs index 938f00349..ff8c4d788 100644 --- a/src/query/query_parser2/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -4,11 +4,9 @@ use query::BooleanQuery; use super::logical_ast::*; use super::user_input_ast::*; use super::query_grammar::parse_to_ast; -use super::boolean_operator::BooleanOperator; use query::Occur; use query::TermQuery; use query::PhraseQuery; -use combine::ParseError; use analyzer::SimpleTokenizer; use analyzer::StreamingIterator; use schema::Term; @@ -16,7 +14,7 @@ use schema::Term; /// Possible error that may happen when parsing a query. -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub enum QueryParserError { /// Error in the query syntax SyntaxError, @@ -66,7 +64,7 @@ pub enum QueryParserError { pub struct QueryParser { schema: Schema, default_fields: Vec, - default_operator: BooleanOperator, + conjunction_by_default: bool, analyzer: Box, } @@ -80,11 +78,15 @@ impl QueryParser { QueryParser { schema: schema, default_fields: default_fields, - default_operator: BooleanOperator::And, + conjunction_by_default: false, analyzer: box SimpleTokenizer, } } + pub fn set_conjunction_by_default(&mut self) { + self.conjunction_by_default = true; + } + /// Parse a query /// /// Note that `parse_query` returns an error if the input @@ -96,10 +98,14 @@ impl QueryParser { /// Implementing a lenient mode for this query parser is tracked /// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5) pub fn parse_query(&self, query: &str) -> Result, QueryParserError> { - let (user_input_ast, remaining) = try!(parse_to_ast(query).map_err(|e| QueryParserError::SyntaxError)); - let logical_ast = try!(self.compute_logical_ast(user_input_ast)); + let logical_ast = self.parse_query_to_logical_ast(query)?; Ok(convert_to_query(logical_ast)) } + + pub fn parse_query_to_logical_ast(&self, query: &str) -> Result { + let (user_input_ast, remaining) = parse_to_ast(query).map_err(|_| QueryParserError::SyntaxError)?; + self.compute_logical_ast(user_input_ast) + } fn resolve_field_name(&self, field_name: &str) -> Result { self.schema.get_field(field_name) @@ -139,12 +145,28 @@ impl QueryParser { } } + fn default_occur(&self) -> Occur { + if self.conjunction_by_default { + Occur::Must + } + else { + Occur::Should + } + } + + pub fn compute_logical_ast_with_occur(&self, user_input_ast: UserInputAST) -> Result<(Occur, LogicalAST), QueryParserError> { match user_input_ast { UserInputAST::Clause(sub_queries) => { + let default_occur = self.default_occur(); let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(sub_queries .into_iter() .map(|sub_query| self.compute_logical_ast_with_occur(*sub_query)) + .map(|res| + res.map( + |(occur, sub_ast)| (default_occur.compose(occur), sub_ast) + ) + ) .collect()); Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries))) } @@ -230,3 +252,50 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box { } } + +#[cfg(test)] +mod test { + use schema::{SchemaBuilder, TEXT}; + use super::QueryParser; + use super::QueryParserError; + use super::super::logical_ast::*; + + fn parse_query_to_logical_ast(query: &str, default_conjunction: bool) -> Result { + let mut schema_builder = SchemaBuilder::default(); + let title = schema_builder.add_text_field("title", TEXT); + let text = schema_builder.add_text_field("text", TEXT); + let schema = schema_builder.build(); + let default_fields = vec!(title, text); + let mut query_parser = QueryParser::new(schema, default_fields); + if default_conjunction { + query_parser.set_conjunction_by_default(); + } + query_parser.parse_query_to_logical_ast(query) + } + + fn test_parse_query_to_logical_ast_helper(query: &str, expected: &str, default_conjunction: bool) { + let query = parse_query_to_logical_ast(query, default_conjunction).unwrap(); + let query_str = format!("{:?}", query); + assert_eq!(query_str, expected); + } + + #[test] + pub fn test_parse_query_to_ast_disjunction() { + test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", false); + test_parse_query_to_logical_ast_helper("+title:toto", "Term([0, 116, 111, 116, 111])", false); + test_parse_query_to_logical_ast_helper("+title:toto -titi", "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, 105, 116, 105]) Term([1, 116, 105, 116, 105])))", false); + assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(), QueryParserError::AllButQueryForbidden); + test_parse_query_to_logical_ast_helper("title:a b", "(Term([0, 97]) (Term([0, 98]) Term([1, 98])))", false); + test_parse_query_to_logical_ast_helper("title:\"a b\"", "\"[Term([0, 97]), Term([0, 98])]\"", false); + } + + #[test] + pub fn test_parse_query_to_ast_conjunction() { + test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", true); + test_parse_query_to_logical_ast_helper("+title:toto", "Term([0, 116, 111, 116, 111])", true); + test_parse_query_to_logical_ast_helper("+title:toto -titi", "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, 105, 116, 105]) Term([1, 116, 105, 116, 105])))", true); + assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(), QueryParserError::AllButQueryForbidden); + test_parse_query_to_logical_ast_helper("title:a b", "(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))", true); + test_parse_query_to_logical_ast_helper("title:\"a b\"", "\"[Term([0, 97]), Term([0, 98])]\"", true); + } +} diff --git a/src/query/query_parser2/user_input_ast.rs b/src/query/query_parser/user_input_ast.rs similarity index 100% rename from src/query/query_parser2/user_input_ast.rs rename to src/query/query_parser/user_input_ast.rs diff --git a/src/query/query_parser2/boolean_operator.rs b/src/query/query_parser2/boolean_operator.rs deleted file mode 100644 index 2ce934c51..000000000 --- a/src/query/query_parser2/boolean_operator.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub enum BooleanOperator { - And, - Or, -} \ No newline at end of file diff --git a/src/query/query_parser2/mod.rs b/src/query/query_parser2/mod.rs deleted file mode 100644 index 3261eeb2c..000000000 --- a/src/query/query_parser2/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod query_parser; -mod query_grammar; -mod user_input_ast; -mod logical_ast; -mod boolean_operator; \ No newline at end of file