mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-06 01:02:55 +00:00
Various bugfix in the query parser (#619)
This commit is contained in:
@@ -2,7 +2,10 @@ Tantivy 0.11.0
|
||||
=====================
|
||||
|
||||
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
||||
- Closes #609. Better handling of hyphens in query parser.
|
||||
- Various bugfixes in the query parser.
|
||||
- Better handling of hyphens in query parser. (#609)
|
||||
- Better handling of whitespaces.
|
||||
|
||||
|
||||
Tantivy 0.10.1
|
||||
=====================
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
#![doc(html_logo_url = "http://fulmicoton.com/tantivy-logo/tantivy-logo.png")]
|
||||
#![recursion_limit = "100"]
|
||||
#![cfg_attr(all(feature = "unstable", test), feature(test))]
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(clippy::module_inception))]
|
||||
#![doc(test(attr(allow(unused_variables), deny(warnings))))]
|
||||
#![warn(missing_docs)]
|
||||
#![recursion_limit = "80"]
|
||||
|
||||
//! # `tantivy`
|
||||
//!
|
||||
|
||||
@@ -18,7 +18,6 @@ pub enum LogicalLiteral {
|
||||
All,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum LogicalAST {
|
||||
Clause(Vec<(Occur, LogicalAST)>),
|
||||
Leaf(Box<LogicalLiteral>),
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use super::query_grammar;
|
||||
use super::user_input_ast::*;
|
||||
use crate::query::occur::Occur;
|
||||
use crate::query::query_parser::user_input_ast::UserInputBound;
|
||||
@@ -13,7 +12,7 @@ parser! {
|
||||
(
|
||||
letter(),
|
||||
many(satisfy(|c: char| c.is_alphanumeric() || c == '_')),
|
||||
).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||
).skip(char(':')).map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,18 +20,17 @@ parser! {
|
||||
fn word[I]()(I) -> String
|
||||
where [I: Stream<Item = char>] {
|
||||
(
|
||||
satisfy(|c: char| c.is_alphanumeric()),
|
||||
satisfy(|c: char| !c.is_whitespace() && !['-', '`', ':', '{', '}', '"', '[', ']', '(',')'].contains(&c) ),
|
||||
many(satisfy(|c: char| !c.is_whitespace() && ![':', '{', '}', '"', '[', ']', '(',')'].contains(&c)))
|
||||
)
|
||||
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||
.and_then(|s: String| {
|
||||
.and_then(|s: String|
|
||||
match s.as_str() {
|
||||
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
|
||||
"AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
|
||||
"NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
|
||||
_ => Ok(s)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,12 +39,13 @@ parser! {
|
||||
where [I: Stream<Item = char>]
|
||||
{
|
||||
let term_val = || {
|
||||
let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s);
|
||||
let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
|
||||
phrase.or(word())
|
||||
};
|
||||
let term_val_with_field = negative_number().or(term_val());
|
||||
let term_query =
|
||||
(field(), char(':'), term_val_with_field).map(|(field_name, _, phrase)| UserInputLiteral {
|
||||
(field(), term_val_with_field)
|
||||
.map(|(field_name, phrase)| UserInputLiteral {
|
||||
field_name: Some(field_name),
|
||||
phrase,
|
||||
});
|
||||
@@ -64,8 +63,8 @@ parser! {
|
||||
fn negative_number[I]()(I) -> String
|
||||
where [I: Stream<Item = char>]
|
||||
{
|
||||
(char('-'), many1(satisfy(char::is_numeric)))
|
||||
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||
(char('-'), many1(satisfy(char::is_numeric)))
|
||||
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,27 +78,23 @@ parser! {
|
||||
parser! {
|
||||
fn range[I]()(I) -> UserInputLeaf
|
||||
where [I: Stream<Item = char>] {
|
||||
let term_val = || {
|
||||
word().or(negative_number()).or(char('*').map(|_| "*".to_string()))
|
||||
};
|
||||
let lower_bound = {
|
||||
let excl = (char('{'), term_val()).map(|(_, w)| UserInputBound::Exclusive(w));
|
||||
let incl = (char('['), term_val()).map(|(_, w)| UserInputBound::Inclusive(w));
|
||||
attempt(excl).or(incl)
|
||||
};
|
||||
let upper_bound = {
|
||||
let excl = (term_val(), char('}')).map(|(w, _)| UserInputBound::Exclusive(w));
|
||||
let incl = (term_val(), char(']')).map(|(w, _)| UserInputBound::Inclusive(w));
|
||||
attempt(excl).or(incl)
|
||||
let range_term_val = || {
|
||||
word().or(negative_number()).or(char('*').with(value("*".to_string())))
|
||||
};
|
||||
let lower_bound = (one_of("{[".chars()), range_term_val())
|
||||
.map(|(boundary_char, lower_bound): (char, String)|
|
||||
if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) }
|
||||
else { UserInputBound::Inclusive(lower_bound) });
|
||||
let upper_bound = (range_term_val(), one_of("}]".chars()))
|
||||
.map(|(higher_bound, boundary_char): (String, char)|
|
||||
if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) }
|
||||
else { UserInputBound::Inclusive(higher_bound) });
|
||||
(
|
||||
optional((field(), char(':')).map(|x| x.0)),
|
||||
lower_bound,
|
||||
spaces(),
|
||||
string("TO"),
|
||||
spaces(),
|
||||
optional(field()),
|
||||
lower_bound
|
||||
.skip((spaces(), string("TO"), spaces())),
|
||||
upper_bound,
|
||||
).map(|(field, lower, _, _, _, upper)| UserInputLeaf::Range {
|
||||
).map(|(field, lower, upper)| UserInputLeaf::Range {
|
||||
field,
|
||||
lower,
|
||||
upper
|
||||
@@ -107,23 +102,28 @@ parser! {
|
||||
}
|
||||
}
|
||||
|
||||
fn negate(expr: UserInputAST) -> UserInputAST {
|
||||
expr.unary(Occur::MustNot)
|
||||
}
|
||||
|
||||
fn must(expr: UserInputAST) -> UserInputAST {
|
||||
expr.unary(Occur::Must)
|
||||
}
|
||||
|
||||
parser! {
|
||||
fn leaf[I]()(I) -> UserInputAST
|
||||
where [I: Stream<Item = char>] {
|
||||
(char('-'), leaf()).map(|(_, expr)| expr.unary(Occur::MustNot) )
|
||||
.or((char('+'), leaf()).map(|(_, expr)| expr.unary(Occur::Must) ))
|
||||
.or((char('('), parse_to_ast(), char(')')).map(|(_, expr, _)| expr))
|
||||
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All) ))
|
||||
.or(attempt(
|
||||
(string("NOT"), spaces1(), leaf()).map(|(_, _, expr)| expr.unary(Occur::MustNot))
|
||||
)
|
||||
)
|
||||
.or(attempt(
|
||||
range().map(UserInputAST::from)))
|
||||
.or(literal().map(|leaf| UserInputAST::Leaf(Box::new(leaf))))
|
||||
char('-').with(leaf()).map(negate)
|
||||
.or(char('+').with(leaf()).map(must))
|
||||
.or(char('(').with(ast()).skip(char(')')))
|
||||
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All)))
|
||||
.or(attempt(string("NOT").skip(spaces1()).with(leaf()).map(negate)))
|
||||
.or(attempt(range().map(UserInputAST::from)))
|
||||
.or(literal().map(UserInputAST::from))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum BinaryOperand {
|
||||
Or,
|
||||
And,
|
||||
@@ -131,27 +131,54 @@ enum BinaryOperand {
|
||||
|
||||
parser! {
|
||||
fn binary_operand[I]()(I) -> BinaryOperand
|
||||
where [I: Stream<Item = char>] {
|
||||
(spaces1(),
|
||||
(
|
||||
string("AND").map(|_| BinaryOperand::And)
|
||||
.or(string("OR").map(|_| BinaryOperand::Or))
|
||||
),
|
||||
spaces1()).map(|(_, op,_)| op)
|
||||
where [I: Stream<Item = char>]
|
||||
{
|
||||
string("AND").with(value(BinaryOperand::And))
|
||||
.or(string("OR").with(value(BinaryOperand::Or)))
|
||||
}
|
||||
}
|
||||
|
||||
enum Element {
|
||||
SingleEl(UserInputAST),
|
||||
NormalDisjunctive(Vec<Vec<UserInputAST>>),
|
||||
fn aggregate_binary_expressions(
|
||||
left: UserInputAST,
|
||||
others: Vec<(BinaryOperand, UserInputAST)>,
|
||||
) -> UserInputAST {
|
||||
let mut dnf: Vec<Vec<UserInputAST>> = vec![vec![left]];
|
||||
for (operator, operand_ast) in others {
|
||||
match operator {
|
||||
BinaryOperand::And => {
|
||||
if let Some(last) = dnf.last_mut() {
|
||||
last.push(operand_ast);
|
||||
}
|
||||
}
|
||||
BinaryOperand::Or => {
|
||||
dnf.push(vec![operand_ast]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if dnf.len() == 1 {
|
||||
UserInputAST::and(dnf.into_iter().next().unwrap()) //< safe
|
||||
} else {
|
||||
let conjunctions = dnf.into_iter().map(UserInputAST::and).collect();
|
||||
UserInputAST::or(conjunctions)
|
||||
}
|
||||
}
|
||||
|
||||
impl Element {
|
||||
pub fn into_dnf(self) -> Vec<Vec<UserInputAST>> {
|
||||
match self {
|
||||
Element::NormalDisjunctive(conjunctions) => conjunctions,
|
||||
Element::SingleEl(el) => vec![vec![el]],
|
||||
}
|
||||
parser! {
|
||||
pub fn ast[I]()(I) -> UserInputAST
|
||||
where [I: Stream<Item = char>]
|
||||
{
|
||||
let operand_leaf = (binary_operand().skip(spaces()), leaf().skip(spaces()));
|
||||
let boolean_expr = (leaf().skip(spaces().silent()), many1(operand_leaf)).map(
|
||||
|(left, right)| aggregate_binary_expressions(left,right));
|
||||
let whitespace_separated_leaves = many1(leaf().skip(spaces().silent()))
|
||||
.map(|subqueries: Vec<UserInputAST>|
|
||||
if subqueries.len() == 1 {
|
||||
subqueries.into_iter().next().unwrap()
|
||||
} else {
|
||||
UserInputAST::Clause(subqueries.into_iter().collect())
|
||||
});
|
||||
let expr = attempt(boolean_expr).or(whitespace_separated_leaves);
|
||||
spaces().with(expr).skip(spaces())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,56 +186,7 @@ parser! {
|
||||
pub fn parse_to_ast[I]()(I) -> UserInputAST
|
||||
where [I: Stream<Item = char>]
|
||||
{
|
||||
(
|
||||
attempt(
|
||||
chainl1(
|
||||
leaf().map(Element::SingleEl),
|
||||
binary_operand().map(|op: BinaryOperand|
|
||||
move |left: Element, right: Element| {
|
||||
let mut dnf = left.into_dnf();
|
||||
if let Element::SingleEl(el) = right {
|
||||
match op {
|
||||
BinaryOperand::And => {
|
||||
if let Some(last) = dnf.last_mut() {
|
||||
last.push(el);
|
||||
}
|
||||
}
|
||||
BinaryOperand::Or => {
|
||||
dnf.push(vec!(el));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unreachable!("Please report.")
|
||||
}
|
||||
Element::NormalDisjunctive(dnf)
|
||||
}
|
||||
)
|
||||
)
|
||||
.map(query_grammar::Element::into_dnf)
|
||||
.map(|fnd| {
|
||||
if fnd.len() == 1 {
|
||||
UserInputAST::and(fnd.into_iter().next().unwrap()) //< safe
|
||||
} else {
|
||||
let conjunctions = fnd
|
||||
.into_iter()
|
||||
.map(UserInputAST::and)
|
||||
.collect();
|
||||
UserInputAST::or(conjunctions)
|
||||
}
|
||||
})
|
||||
)
|
||||
.or(
|
||||
sep_by(leaf(), spaces())
|
||||
.map(|subqueries: Vec<UserInputAST>| {
|
||||
if subqueries.len() == 1 {
|
||||
subqueries.into_iter().next().unwrap()
|
||||
} else {
|
||||
UserInputAST::Clause(subqueries.into_iter().collect())
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
|
||||
spaces().with(optional(ast()).skip(eof())).map(|opt_ast| opt_ast.unwrap_or(UserInputAST::empty_query()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,6 +205,11 @@ mod test {
|
||||
assert!(parse_to_ast().parse(query).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_empty_to_ast() {
|
||||
test_parse_query_to_ast_helper("", "<emptyclause>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_query_to_ast_hyphen() {
|
||||
test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
|
||||
@@ -268,8 +251,24 @@ mod test {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_query_to_triming_spaces() {
|
||||
test_parse_query_to_ast_helper(" abc", "\"abc\"");
|
||||
test_parse_query_to_ast_helper("abc ", "\"abc\"");
|
||||
test_parse_query_to_ast_helper("( a OR abc)", "(?(\"a\") ?(\"abc\"))");
|
||||
test_parse_query_to_ast_helper("(a OR abc)", "(?(\"a\") ?(\"abc\"))");
|
||||
test_parse_query_to_ast_helper("(a OR abc)", "(?(\"a\") ?(\"abc\"))");
|
||||
test_parse_query_to_ast_helper("a OR abc ", "(?(\"a\") ?(\"abc\"))");
|
||||
test_parse_query_to_ast_helper("(a OR abc )", "(?(\"a\") ?(\"abc\"))");
|
||||
test_parse_query_to_ast_helper("(a OR abc) ", "(?(\"a\") ?(\"abc\"))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_query_to_ast() {
|
||||
test_parse_query_to_ast_helper("abc", "\"abc\"");
|
||||
test_parse_query_to_ast_helper("a b", "(\"a\" \"b\")");
|
||||
test_parse_query_to_ast_helper("+(a b)", "+((\"a\" \"b\"))");
|
||||
test_parse_query_to_ast_helper("+d", "+(\"d\")");
|
||||
test_parse_query_to_ast_helper("+(a b) +d", "(+((\"a\" \"b\")) +(\"d\"))");
|
||||
test_parse_query_to_ast_helper("(+a +b) d", "((+(\"a\") +(\"b\")) \"d\")");
|
||||
test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
|
||||
@@ -281,11 +280,6 @@ mod test {
|
||||
test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")");
|
||||
test_parse_query_to_ast_helper("abc:a b", "(abc:\"a\" \"b\")");
|
||||
test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\"");
|
||||
test_is_parse_err("abc + ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_query_to_ast_range() {
|
||||
test_parse_query_to_ast_helper("foo:[1 TO 5]", "foo:[\"1\" TO \"5\"]");
|
||||
test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
|
||||
test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
|
||||
@@ -293,5 +287,6 @@ mod test {
|
||||
test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
|
||||
test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
|
||||
test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
|
||||
test_is_parse_err("abc + ");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,9 +80,6 @@ impl UserInputBound {
|
||||
pub enum UserInputAST {
|
||||
Clause(Vec<UserInputAST>),
|
||||
Unary(Occur, Box<UserInputAST>),
|
||||
// Not(Box<UserInputAST>),
|
||||
// Should(Box<UserInputAST>),
|
||||
// Must(Box<UserInputAST>),
|
||||
Leaf(Box<UserInputLeaf>),
|
||||
}
|
||||
|
||||
@@ -92,7 +89,7 @@ impl UserInputAST {
|
||||
}
|
||||
|
||||
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
||||
assert!(occur != Occur::MustNot);
|
||||
assert_ne!(occur, Occur::MustNot);
|
||||
assert!(!asts.is_empty());
|
||||
if asts.len() == 1 {
|
||||
asts.into_iter().next().unwrap() //< safe
|
||||
@@ -105,6 +102,10 @@ impl UserInputAST {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn empty_query() -> UserInputAST {
|
||||
UserInputAST::Clause(Vec::default())
|
||||
}
|
||||
|
||||
pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
|
||||
UserInputAST::compose(Occur::Must, asts)
|
||||
}
|
||||
@@ -114,42 +115,6 @@ impl UserInputAST {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
impl UserInputAST {
|
||||
|
||||
fn compose_occur(self, occur: Occur) -> UserInputAST {
|
||||
match self {
|
||||
UserInputAST::Not(other) => {
|
||||
let new_occur = compose_occur(Occur::MustNot, occur);
|
||||
other.simplify()
|
||||
}
|
||||
_ => {
|
||||
self
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simplify(self) -> UserInputAST {
|
||||
match self {
|
||||
UserInputAST::Clause(els) => {
|
||||
if els.len() == 1 {
|
||||
return els.into_iter().next().unwrap();
|
||||
} else {
|
||||
return self;
|
||||
}
|
||||
}
|
||||
UserInputAST::Not(els) => {
|
||||
if els.len() == 1 {
|
||||
return els.into_iter().next().unwrap();
|
||||
} else {
|
||||
return self;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
impl From<UserInputLiteral> for UserInputLeaf {
|
||||
fn from(literal: UserInputLiteral) -> UserInputLeaf {
|
||||
UserInputLeaf::Literal(literal)
|
||||
|
||||
Reference in New Issue
Block a user