mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-27 20:42:54 +00:00
Compare commits
4 Commits
missing-sp
...
dds/lenien
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
488bceda10 | ||
|
|
f2b8755e10 | ||
|
|
fa269f1f34 | ||
|
|
e23a9303ce |
@@ -177,9 +177,6 @@ impl QueryParser {
|
||||
///
|
||||
/// There is currently no lenient mode for the query parser
|
||||
/// which makes it a bad choice for a public/broad user search engine.
|
||||
///
|
||||
/// Implementing a lenient mode for this query parser is tracked
|
||||
/// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5)
|
||||
pub fn parse_query(&self, query: &str) -> Result<Box<Query>, QueryParserError> {
|
||||
let logical_ast = self.parse_query_to_logical_ast(query)?;
|
||||
Ok(convert_to_query(logical_ast))
|
||||
@@ -193,6 +190,61 @@ impl QueryParser {
|
||||
self.compute_logical_ast(user_input_ast)
|
||||
}
|
||||
|
||||
/// Parse a query
|
||||
///
|
||||
/// Note that `parse_query_lenient` will NOT return an error
|
||||
/// if the input is not a valid query.
|
||||
///
|
||||
/// It will instead escape all special characters in the query body
|
||||
/// retry to process the query, if it still fails will return the AllQuery
|
||||
pub fn parse_query_lenient(&self, query: &str) -> Box<Query> {
|
||||
if let Ok(logical_ast) = self.parse_query_to_logical_ast(query) {
|
||||
return convert_to_query(logical_ast);
|
||||
}
|
||||
|
||||
// try to clean up the query
|
||||
if let Ok(logical_ast) = self.parse_lenient_query_to_logical_ast(query) {
|
||||
return convert_to_query(logical_ast);
|
||||
}
|
||||
|
||||
// we have no idea what you want, so here's nothing
|
||||
Box::new(EmptyQuery)
|
||||
}
|
||||
|
||||
/// Parse the user query into an AST.
|
||||
fn parse_lenient_query_to_logical_ast(
|
||||
&self,
|
||||
query: &str,
|
||||
) -> Result<LogicalAST, QueryParserError> {
|
||||
// if we are here, we know we have a poorly formed
|
||||
// query input
|
||||
|
||||
// # Escape special characters: \\+-&|!(){}[]^~*?:\/
|
||||
let special_chars = "\\+-&|!(){}[]^~*?:/";
|
||||
let mut scrubbed_query = query
|
||||
.chars()
|
||||
.filter(|c| !special_chars.contains(*c))
|
||||
.collect::<String>();
|
||||
|
||||
// AND, OR and NOT are used by tantivy as logical operators. We need
|
||||
// to escape them
|
||||
let special_words = vec!["AND", "OR", "NOT"];
|
||||
for word in special_words.iter() {
|
||||
scrubbed_query = scrubbed_query.replace(word, &format!("{}", word));
|
||||
}
|
||||
|
||||
// Escape odd quotes
|
||||
let quote_count = scrubbed_query.chars().filter(|&c| c == '\"').count();
|
||||
if quote_count % 2 == 1 {
|
||||
scrubbed_query = scrubbed_query.replace("\"", "\\\"");
|
||||
}
|
||||
|
||||
let (user_input_ast, _remaining) = parse_to_ast()
|
||||
.parse(scrubbed_query.as_str())
|
||||
.map_err(|_| QueryParserError::SyntaxError)?;
|
||||
self.compute_logical_ast(user_input_ast)
|
||||
}
|
||||
|
||||
fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
|
||||
self.schema
|
||||
.get_field(field_name)
|
||||
@@ -544,6 +596,26 @@ mod test {
|
||||
assert!(query_parser.parse_query("toto").is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_query_lenient_no_panics() {
|
||||
let query_parser = make_query_parser();
|
||||
|
||||
query_parser.parse_query_lenient("toto");
|
||||
query_parser.parse_query_lenient("");
|
||||
query_parser.parse_query_lenient("+(happy");
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_query_lenient_escapes_bad_queries() {
|
||||
let query_parser = make_query_parser();
|
||||
|
||||
let query = query_parser
|
||||
.parse_lenient_query_to_logical_ast("+(happy")
|
||||
.unwrap();
|
||||
let query_str = format!("{:?}", query);
|
||||
assert_eq!(query_str, "(Term([0, 0, 0, 0, 104, 97, 112, 112, 121]) Term([0, 0, 0, 1, 104, 97, 112, 112, 121]))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_parse_nonindexed_field_yields_error() {
|
||||
let query_parser = make_query_parser();
|
||||
|
||||
@@ -80,9 +80,6 @@ impl UserInputBound {
|
||||
pub enum UserInputAST {
|
||||
Clause(Vec<UserInputAST>),
|
||||
Unary(Occur, Box<UserInputAST>),
|
||||
// Not(Box<UserInputAST>),
|
||||
// Should(Box<UserInputAST>),
|
||||
// Must(Box<UserInputAST>),
|
||||
Leaf(Box<UserInputLeaf>),
|
||||
}
|
||||
|
||||
@@ -92,7 +89,7 @@ impl UserInputAST {
|
||||
}
|
||||
|
||||
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
||||
assert!(occur != Occur::MustNot);
|
||||
assert_ne!(occur, Occur::MustNot);
|
||||
assert!(!asts.is_empty());
|
||||
if asts.len() == 1 {
|
||||
asts.into_iter().next().unwrap() //< safe
|
||||
@@ -114,42 +111,6 @@ impl UserInputAST {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
impl UserInputAST {
|
||||
|
||||
fn compose_occur(self, occur: Occur) -> UserInputAST {
|
||||
match self {
|
||||
UserInputAST::Not(other) => {
|
||||
let new_occur = compose_occur(Occur::MustNot, occur);
|
||||
other.simplify()
|
||||
}
|
||||
_ => {
|
||||
self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simplify(self) -> UserInputAST {
|
||||
match self {
|
||||
UserInputAST::Clause(els) => {
|
||||
if els.len() == 1 {
|
||||
return els.into_iter().next().unwrap();
|
||||
} else {
|
||||
return self;
|
||||
}
|
||||
}
|
||||
UserInputAST::Not(els) => {
|
||||
if els.len() == 1 {
|
||||
return els.into_iter().next().unwrap();
|
||||
} else {
|
||||
return self;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
impl From<UserInputLiteral> for UserInputLeaf {
|
||||
fn from(literal: UserInputLiteral) -> UserInputLeaf {
|
||||
UserInputLeaf::Literal(literal)
|
||||
|
||||
Reference in New Issue
Block a user