mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-26 21:20:40 +00:00
- internally wrapped by an Arc, cloning aggressively is ok. - a field in schema, redundant with metas - read-only. Needs to be built via a SchemaBuilder.
320 lines
10 KiB
Rust
320 lines
10 KiB
Rust
use Result as tantivy_Error;
|
|
use combine::*;
|
|
use collector::Collector;
|
|
use core::searcher::Searcher;
|
|
use common::TimerTree;
|
|
use query::{Query, MultiTermQuery};
|
|
use schema::{Schema, FieldType, Term, Field};
|
|
use analyzer::SimpleTokenizer;
|
|
use analyzer::StreamingIterator;
|
|
use DocAddress;
|
|
use query::Explanation;
|
|
use query::Occur;
|
|
|
|
#[derive(Debug)]
|
|
pub enum ParsingError {
|
|
SyntaxError,
|
|
FieldDoesNotExist(String),
|
|
ExpectedU32(String, String),
|
|
}
|
|
|
|
pub struct QueryParser {
|
|
schema: Schema,
|
|
default_fields: Vec<Field>,
|
|
}
|
|
|
|
#[derive(Eq, PartialEq, Debug)]
|
|
pub enum StandardQuery {
|
|
MultiTerm(MultiTermQuery),
|
|
}
|
|
|
|
impl StandardQuery {
|
|
pub fn num_terms(&self,) -> usize {
|
|
match self {
|
|
&StandardQuery::MultiTerm(ref q) => {
|
|
q.num_terms()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Query for StandardQuery {
|
|
fn search<C: Collector>(&self, searcher: &Searcher, collector: &mut C) -> tantivy_Error<TimerTree> {
|
|
match *self {
|
|
StandardQuery::MultiTerm(ref q) => {
|
|
q.search(searcher, collector)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn explain(
|
|
&self,
|
|
searcher: &Searcher,
|
|
doc_address: &DocAddress) -> tantivy_Error<Explanation> {
|
|
match self {
|
|
&StandardQuery::MultiTerm(ref q) => q.explain(searcher, doc_address)
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
fn compute_terms(field: Field, text: &str) -> Vec<Term> {
|
|
let tokenizer = SimpleTokenizer::new();
|
|
let mut tokens = Vec::new();
|
|
let mut token_it = tokenizer.tokenize(text);
|
|
loop {
|
|
match token_it.next() {
|
|
Some(token_str) => {
|
|
tokens.push(Term::from_field_text(field, token_str));
|
|
}
|
|
None => { break; }
|
|
}
|
|
}
|
|
tokens
|
|
}
|
|
|
|
|
|
impl QueryParser {
|
|
pub fn new(schema: Schema,
|
|
default_fields: Vec<Field>) -> QueryParser {
|
|
QueryParser {
|
|
schema: schema,
|
|
default_fields: default_fields,
|
|
}
|
|
}
|
|
|
|
|
|
fn transform_field_and_value(&self, field: Field, val: &str) -> Result<Vec<Term>, ParsingError> {
|
|
let field_entry = self.schema.get_field_entry(field);
|
|
Ok(match field_entry.field_type() {
|
|
&FieldType::Str(_) => {
|
|
compute_terms(field, val)
|
|
},
|
|
&FieldType::U32(_) => {
|
|
let u32_parsed: u32 = try!(val
|
|
.parse::<u32>()
|
|
.map_err(|_| {
|
|
ParsingError::ExpectedU32(field_entry.name().clone(), String::from(val))
|
|
})
|
|
);
|
|
vec!(Term::from_field_u32(field, u32_parsed))
|
|
}
|
|
})
|
|
}
|
|
|
|
fn transform_literal(&self, literal: Literal) -> Result<Vec<Term>, ParsingError> {
|
|
match literal {
|
|
Literal::DefaultField(val) => {
|
|
let mut terms = Vec::new();
|
|
for &field in &self.default_fields {
|
|
let extra_terms = try!(self.transform_field_and_value(field, &val));
|
|
terms.extend_from_slice(&extra_terms);
|
|
}
|
|
Ok(terms)
|
|
},
|
|
Literal::WithField(field_name, val) => {
|
|
match self.schema.get_field(&field_name) {
|
|
Some(field) => {
|
|
let terms = try!(self.transform_field_and_value(field, &val));
|
|
Ok(terms)
|
|
},
|
|
None => Err(ParsingError::FieldDoesNotExist(field_name))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn parse_query(&self, query: &str) -> Result<StandardQuery, ParsingError> {
|
|
match parser(query_language).parse(query.trim()) {
|
|
Ok(literals) => {
|
|
let mut terms_result: Vec<(Occur, Term)> = Vec::new();
|
|
for (occur, literal) in literals.0 {
|
|
let literal_terms = try!(self.transform_literal(literal));
|
|
terms_result
|
|
.extend(literal_terms
|
|
.into_iter()
|
|
.map(|term| (occur, term) ));
|
|
}
|
|
Ok(
|
|
StandardQuery::MultiTerm(
|
|
MultiTermQuery::from(terms_result)
|
|
)
|
|
)
|
|
}
|
|
Err(_) => {
|
|
Err(ParsingError::SyntaxError)
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
|
pub enum Literal {
|
|
WithField(String, String),
|
|
DefaultField(String),
|
|
}
|
|
|
|
// TODO handle as a specific case, having a single MUST_NOT term
|
|
|
|
|
|
pub fn query_language(input: State<&str>) -> ParseResult<Vec<(Occur, Literal)>, &str>
|
|
{
|
|
let literal = || {
|
|
let term_val = || {
|
|
let word = many1(satisfy(|c: char| c.is_alphanumeric()));
|
|
let phrase =
|
|
(char('"'), many1(satisfy(|c| c != '"')), char('"'),)
|
|
.map(|(_, s, _)| s);
|
|
phrase.or(word)
|
|
};
|
|
|
|
let field = many1(letter());
|
|
let term_query = (field, char(':'), term_val())
|
|
.map(|(field,_, value)| Literal::WithField(field, value));
|
|
let term_default_field = term_val().map(Literal::DefaultField);
|
|
|
|
let occur = optional(char('-').or(char('+')))
|
|
.map(|opt_c| {
|
|
match opt_c {
|
|
Some('-') => Occur::MustNot,
|
|
Some('+') => Occur::Must,
|
|
_ => Occur::Should,
|
|
}
|
|
});
|
|
(occur, try(term_query).or(term_default_field))
|
|
};
|
|
|
|
(sep_by(literal(), spaces()), eof())
|
|
.map(|(first, _)| first)
|
|
.parse_state(input)
|
|
}
|
|
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
|
|
use combine::*;
|
|
use schema::*;
|
|
use query::MultiTermQuery;
|
|
use query::Occur;
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
pub fn test_query_grammar() {
|
|
let mut grammar_parser = parser(query_language);
|
|
assert_eq!(grammar_parser.parse("abc:toto").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::WithField(String::from("abc"), String::from("toto")))
|
|
)
|
|
);
|
|
assert_eq!(
|
|
grammar_parser.parse("\"some phrase query\"").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::DefaultField(String::from("some phrase query"))),
|
|
)
|
|
);
|
|
assert_eq!(
|
|
grammar_parser.parse("field:\"some phrase query\"").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::WithField(String::from("field"), String::from("some phrase query")))
|
|
));
|
|
assert_eq!(grammar_parser.parse("field:\"some phrase query\" field:toto a").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::WithField(String::from("field"), String::from("some phrase query"))),
|
|
(Occur::Should, Literal::WithField(String::from("field"), String::from("toto"))),
|
|
(Occur::Should, Literal::DefaultField(String::from("a"))),
|
|
));
|
|
assert_eq!(grammar_parser.parse("field:\"a ! b\"").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::WithField(String::from("field"), String::from("a ! b"))),
|
|
));
|
|
assert_eq!(grammar_parser.parse("field:a9e3").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::WithField(String::from("field"), String::from("a9e3")),)
|
|
));
|
|
assert_eq!(grammar_parser.parse("a9e3").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::DefaultField(String::from("a9e3"))),
|
|
));
|
|
assert_eq!(grammar_parser.parse("field:タンタイビーって早い").unwrap().0,
|
|
vec!(
|
|
(Occur::Should, Literal::WithField(String::from("field"), String::from("タンタイビーって早い"))),
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_query_grammar_with_occur() {
|
|
let mut query_parser = parser(query_language);
|
|
assert_eq!(query_parser.parse("+abc:toto").unwrap().0,
|
|
vec!(
|
|
(Occur::Must, Literal::WithField(String::from("abc"), String::from("toto")))
|
|
)
|
|
);
|
|
assert_eq!(query_parser.parse("+field:\"some phrase query\" -field:toto a").unwrap().0,
|
|
vec!(
|
|
(Occur::Must, Literal::WithField(String::from("field"), String::from("some phrase query"))),
|
|
(Occur::MustNot, Literal::WithField(String::from("field"), String::from("toto"))),
|
|
(Occur::Should, Literal::DefaultField(String::from("a"))),
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_invalid_queries() {
|
|
let mut query_parser = parser(query_language);
|
|
println!("{:?}", query_parser.parse("ab!c:"));
|
|
assert!(query_parser.parse("ab!c:").is_err());
|
|
assert!(query_parser.parse("").is_ok());
|
|
assert!(query_parser.parse(":fval").is_err());
|
|
assert!(query_parser.parse("field:").is_err());
|
|
assert!(query_parser.parse(":field").is_err());
|
|
assert!(query_parser.parse("f:@e!e").is_err());
|
|
assert!(query_parser.parse("f:@e!e").is_err());
|
|
}
|
|
|
|
#[test]
|
|
pub fn test_query_parser() {
|
|
let mut schema_builder = SchemaBuilder::new();
|
|
let text_field = schema_builder.add_text_field("text", STRING);
|
|
let title_field = schema_builder.add_text_field("title", STRING);
|
|
let author_field = schema_builder.add_text_field("author", STRING);
|
|
let query_parser = QueryParser::new(schema_builder.build(), vec!(text_field, author_field));
|
|
assert!(query_parser.parse_query("a:b").is_err());
|
|
{
|
|
let terms = vec!(Term::from_field_text(title_field, "abctitle"));
|
|
let query = StandardQuery::MultiTerm(MultiTermQuery::from(terms));
|
|
assert_eq!(
|
|
query_parser.parse_query("title:abctitle").unwrap(),
|
|
query
|
|
);
|
|
}
|
|
{
|
|
let terms = vec!(
|
|
Term::from_field_text(text_field, "abctitle"),
|
|
Term::from_field_text(author_field, "abctitle"),
|
|
);
|
|
let query = StandardQuery::MultiTerm(MultiTermQuery::from(terms));
|
|
assert_eq!(
|
|
query_parser.parse_query("abctitle").unwrap(),
|
|
query
|
|
);
|
|
}
|
|
{
|
|
let terms = vec!(Term::from_field_text(title_field, "abctitle"));
|
|
let query = StandardQuery::MultiTerm(MultiTermQuery::from(terms));
|
|
assert_eq!(
|
|
query_parser.parse_query("title:abctitle ").unwrap(),
|
|
query
|
|
);
|
|
assert_eq!(
|
|
query_parser.parse_query(" title:abctitle").unwrap(),
|
|
query
|
|
);
|
|
}
|
|
}
|
|
|
|
}
|