mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
Compare commits
4 Commits
commit-cha
...
dds/lenien
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
488bceda10 | ||
|
|
f2b8755e10 | ||
|
|
fa269f1f34 | ||
|
|
e23a9303ce |
@@ -177,9 +177,6 @@ impl QueryParser {
|
|||||||
///
|
///
|
||||||
/// There is currently no lenient mode for the query parser
|
/// There is currently no lenient mode for the query parser
|
||||||
/// which makes it a bad choice for a public/broad user search engine.
|
/// which makes it a bad choice for a public/broad user search engine.
|
||||||
///
|
|
||||||
/// Implementing a lenient mode for this query parser is tracked
|
|
||||||
/// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5)
|
|
||||||
pub fn parse_query(&self, query: &str) -> Result<Box<Query>, QueryParserError> {
|
pub fn parse_query(&self, query: &str) -> Result<Box<Query>, QueryParserError> {
|
||||||
let logical_ast = self.parse_query_to_logical_ast(query)?;
|
let logical_ast = self.parse_query_to_logical_ast(query)?;
|
||||||
Ok(convert_to_query(logical_ast))
|
Ok(convert_to_query(logical_ast))
|
||||||
@@ -193,6 +190,61 @@ impl QueryParser {
|
|||||||
self.compute_logical_ast(user_input_ast)
|
self.compute_logical_ast(user_input_ast)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a query
|
||||||
|
///
|
||||||
|
/// Note that `parse_query_lenient` will NOT return an error
|
||||||
|
/// if the input is not a valid query.
|
||||||
|
///
|
||||||
|
/// It will instead escape all special characters in the query body
|
||||||
|
/// retry to process the query, if it still fails will return the AllQuery
|
||||||
|
pub fn parse_query_lenient(&self, query: &str) -> Box<Query> {
|
||||||
|
if let Ok(logical_ast) = self.parse_query_to_logical_ast(query) {
|
||||||
|
return convert_to_query(logical_ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
// try to clean up the query
|
||||||
|
if let Ok(logical_ast) = self.parse_lenient_query_to_logical_ast(query) {
|
||||||
|
return convert_to_query(logical_ast);
|
||||||
|
}
|
||||||
|
|
||||||
|
// we have no idea what you want, so here's nothing
|
||||||
|
Box::new(EmptyQuery)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the user query into an AST.
|
||||||
|
fn parse_lenient_query_to_logical_ast(
|
||||||
|
&self,
|
||||||
|
query: &str,
|
||||||
|
) -> Result<LogicalAST, QueryParserError> {
|
||||||
|
// if we are here, we know we have a poorly formed
|
||||||
|
// query input
|
||||||
|
|
||||||
|
// # Escape special characters: \\+-&|!(){}[]^~*?:\/
|
||||||
|
let special_chars = "\\+-&|!(){}[]^~*?:/";
|
||||||
|
let mut scrubbed_query = query
|
||||||
|
.chars()
|
||||||
|
.filter(|c| !special_chars.contains(*c))
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
// AND, OR and NOT are used by tantivy as logical operators. We need
|
||||||
|
// to escape them
|
||||||
|
let special_words = vec!["AND", "OR", "NOT"];
|
||||||
|
for word in special_words.iter() {
|
||||||
|
scrubbed_query = scrubbed_query.replace(word, &format!("{}", word));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escape odd quotes
|
||||||
|
let quote_count = scrubbed_query.chars().filter(|&c| c == '\"').count();
|
||||||
|
if quote_count % 2 == 1 {
|
||||||
|
scrubbed_query = scrubbed_query.replace("\"", "\\\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
let (user_input_ast, _remaining) = parse_to_ast()
|
||||||
|
.parse(scrubbed_query.as_str())
|
||||||
|
.map_err(|_| QueryParserError::SyntaxError)?;
|
||||||
|
self.compute_logical_ast(user_input_ast)
|
||||||
|
}
|
||||||
|
|
||||||
fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
|
fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
|
||||||
self.schema
|
self.schema
|
||||||
.get_field(field_name)
|
.get_field(field_name)
|
||||||
@@ -544,6 +596,26 @@ mod test {
|
|||||||
assert!(query_parser.parse_query("toto").is_ok());
|
assert!(query_parser.parse_query("toto").is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_parse_query_lenient_no_panics() {
|
||||||
|
let query_parser = make_query_parser();
|
||||||
|
|
||||||
|
query_parser.parse_query_lenient("toto");
|
||||||
|
query_parser.parse_query_lenient("");
|
||||||
|
query_parser.parse_query_lenient("+(happy");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn test_parse_query_lenient_escapes_bad_queries() {
|
||||||
|
let query_parser = make_query_parser();
|
||||||
|
|
||||||
|
let query = query_parser
|
||||||
|
.parse_lenient_query_to_logical_ast("+(happy")
|
||||||
|
.unwrap();
|
||||||
|
let query_str = format!("{:?}", query);
|
||||||
|
assert_eq!(query_str, "(Term([0, 0, 0, 0, 104, 97, 112, 112, 121]) Term([0, 0, 0, 1, 104, 97, 112, 112, 121]))");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_parse_nonindexed_field_yields_error() {
|
pub fn test_parse_nonindexed_field_yields_error() {
|
||||||
let query_parser = make_query_parser();
|
let query_parser = make_query_parser();
|
||||||
|
|||||||
@@ -80,9 +80,6 @@ impl UserInputBound {
|
|||||||
pub enum UserInputAST {
|
pub enum UserInputAST {
|
||||||
Clause(Vec<UserInputAST>),
|
Clause(Vec<UserInputAST>),
|
||||||
Unary(Occur, Box<UserInputAST>),
|
Unary(Occur, Box<UserInputAST>),
|
||||||
// Not(Box<UserInputAST>),
|
|
||||||
// Should(Box<UserInputAST>),
|
|
||||||
// Must(Box<UserInputAST>),
|
|
||||||
Leaf(Box<UserInputLeaf>),
|
Leaf(Box<UserInputLeaf>),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -92,7 +89,7 @@ impl UserInputAST {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
||||||
assert!(occur != Occur::MustNot);
|
assert_ne!(occur, Occur::MustNot);
|
||||||
assert!(!asts.is_empty());
|
assert!(!asts.is_empty());
|
||||||
if asts.len() == 1 {
|
if asts.len() == 1 {
|
||||||
asts.into_iter().next().unwrap() //< safe
|
asts.into_iter().next().unwrap() //< safe
|
||||||
@@ -114,42 +111,6 @@ impl UserInputAST {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
impl UserInputAST {
|
|
||||||
|
|
||||||
fn compose_occur(self, occur: Occur) -> UserInputAST {
|
|
||||||
match self {
|
|
||||||
UserInputAST::Not(other) => {
|
|
||||||
let new_occur = compose_occur(Occur::MustNot, occur);
|
|
||||||
other.simplify()
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn simplify(self) -> UserInputAST {
|
|
||||||
match self {
|
|
||||||
UserInputAST::Clause(els) => {
|
|
||||||
if els.len() == 1 {
|
|
||||||
return els.into_iter().next().unwrap();
|
|
||||||
} else {
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
UserInputAST::Not(els) => {
|
|
||||||
if els.len() == 1 {
|
|
||||||
return els.into_iter().next().unwrap();
|
|
||||||
} else {
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
impl From<UserInputLiteral> for UserInputLeaf {
|
impl From<UserInputLiteral> for UserInputLeaf {
|
||||||
fn from(literal: UserInputLiteral) -> UserInputLeaf {
|
fn from(literal: UserInputLiteral) -> UserInputLeaf {
|
||||||
UserInputLeaf::Literal(literal)
|
UserInputLeaf::Literal(literal)
|
||||||
|
|||||||
Reference in New Issue
Block a user