Compare commits

...

4 Commits

Author SHA1 Message Date
Paul Masurel
488bceda10 Merge branch 'master' of github.com:tantivy-search/tantivy into dds/lenient 2018-10-30 09:59:48 +09:00
Dru Sellers
f2b8755e10 Update test name and method comments 2018-08-18 10:26:06 -05:00
Dru Sellers
fa269f1f34 Santize known poor queries 2018-08-18 10:19:27 -05:00
Dru Sellers
e23a9303ce Add parse_query_lenient to QueryParser 2018-08-18 09:30:45 -05:00
2 changed files with 76 additions and 43 deletions

View File

@@ -177,9 +177,6 @@ impl QueryParser {
///
/// There is currently no lenient mode for the query parser
/// which makes it a bad choice for a public/broad user search engine.
///
/// Implementing a lenient mode for this query parser is tracked
/// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5)
pub fn parse_query(&self, query: &str) -> Result<Box<Query>, QueryParserError> {
let logical_ast = self.parse_query_to_logical_ast(query)?;
Ok(convert_to_query(logical_ast))
@@ -193,6 +190,61 @@ impl QueryParser {
self.compute_logical_ast(user_input_ast)
}
/// Parse a query
///
/// Note that `parse_query_lenient` will NOT return an error
/// if the input is not a valid query.
///
/// It will instead escape all special characters in the query body
/// retry to process the query, if it still fails will return the AllQuery
pub fn parse_query_lenient(&self, query: &str) -> Box<Query> {
if let Ok(logical_ast) = self.parse_query_to_logical_ast(query) {
return convert_to_query(logical_ast);
}
// try to clean up the query
if let Ok(logical_ast) = self.parse_lenient_query_to_logical_ast(query) {
return convert_to_query(logical_ast);
}
// we have no idea what you want, so here's nothing
Box::new(EmptyQuery)
}
/// Parse the user query into an AST.
fn parse_lenient_query_to_logical_ast(
&self,
query: &str,
) -> Result<LogicalAST, QueryParserError> {
// if we are here, we know we have a poorly formed
// query input
// # Escape special characters: \\+-&|!(){}[]^~*?:\/
let special_chars = "\\+-&|!(){}[]^~*?:/";
let mut scrubbed_query = query
.chars()
.filter(|c| !special_chars.contains(*c))
.collect::<String>();
// AND, OR and NOT are used by tantivy as logical operators. We need
// to escape them
let special_words = vec!["AND", "OR", "NOT"];
for word in special_words.iter() {
scrubbed_query = scrubbed_query.replace(word, &format!("{}", word));
}
// Escape odd quotes
let quote_count = scrubbed_query.chars().filter(|&c| c == '\"').count();
if quote_count % 2 == 1 {
scrubbed_query = scrubbed_query.replace("\"", "\\\"");
}
let (user_input_ast, _remaining) = parse_to_ast()
.parse(scrubbed_query.as_str())
.map_err(|_| QueryParserError::SyntaxError)?;
self.compute_logical_ast(user_input_ast)
}
fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
self.schema
.get_field(field_name)
@@ -544,6 +596,26 @@ mod test {
assert!(query_parser.parse_query("toto").is_ok());
}
#[test]
pub fn test_parse_query_lenient_no_panics() {
let query_parser = make_query_parser();
query_parser.parse_query_lenient("toto");
query_parser.parse_query_lenient("");
query_parser.parse_query_lenient("+(happy");
}
#[test]
pub fn test_parse_query_lenient_escapes_bad_queries() {
let query_parser = make_query_parser();
let query = query_parser
.parse_lenient_query_to_logical_ast("+(happy")
.unwrap();
let query_str = format!("{:?}", query);
assert_eq!(query_str, "(Term([0, 0, 0, 0, 104, 97, 112, 112, 121]) Term([0, 0, 0, 1, 104, 97, 112, 112, 121]))");
}
#[test]
pub fn test_parse_nonindexed_field_yields_error() {
let query_parser = make_query_parser();

View File

@@ -80,9 +80,6 @@ impl UserInputBound {
pub enum UserInputAST {
Clause(Vec<UserInputAST>),
Unary(Occur, Box<UserInputAST>),
// Not(Box<UserInputAST>),
// Should(Box<UserInputAST>),
// Must(Box<UserInputAST>),
Leaf(Box<UserInputLeaf>),
}
@@ -92,7 +89,7 @@ impl UserInputAST {
}
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
assert!(occur != Occur::MustNot);
assert_ne!(occur, Occur::MustNot);
assert!(!asts.is_empty());
if asts.len() == 1 {
asts.into_iter().next().unwrap() //< safe
@@ -114,42 +111,6 @@ impl UserInputAST {
}
}
/*
impl UserInputAST {
fn compose_occur(self, occur: Occur) -> UserInputAST {
match self {
UserInputAST::Not(other) => {
let new_occur = compose_occur(Occur::MustNot, occur);
other.simplify()
}
_ => {
self
}
}
}
pub fn simplify(self) -> UserInputAST {
match self {
UserInputAST::Clause(els) => {
if els.len() == 1 {
return els.into_iter().next().unwrap();
} else {
return self;
}
}
UserInputAST::Not(els) => {
if els.len() == 1 {
return els.into_iter().next().unwrap();
} else {
return self;
}
}
}
}
}
*/
impl From<UserInputLiteral> for UserInputLeaf {
fn from(literal: UserInputLiteral) -> UserInputLeaf {
UserInputLeaf::Literal(literal)