issue/57 Cleaning.

Closes #57
Closes #56
Closes #23
This commit is contained in:
Paul Masurel
2016-11-17 23:18:24 +09:00
parent 3f20788a36
commit 69e11d3779
15 changed files with 259 additions and 345 deletions

View File

@@ -5,28 +5,39 @@ use std::cmp;
use std::f64;
const DEFAULT_LEVEL_LOG_SIZE: f64 = 0.75;
const DEFAULT_MIN_SEGMENT_SIZE: u32 = 10_000;
const DEFAULT_MIN_LAYER_SIZE: u32 = 10_000;
const DEFAULT_MIN_MERGE_SIZE: usize = 8;
pub struct LogMergePolicy {
min_merge_size: usize,
min_segment_size: u32,
min_layer_size: u32,
level_log_size: f64,
}
impl LogMergePolicy {
fn clip_min_size(&self, size: u32) -> u32 {
cmp::max(self.min_segment_size, size)
cmp::max(self.min_layer_size, size)
}
/// Set the minimum number of segment that may be merge together.
pub fn set_min_merge_size(&mut self, min_merge_size: usize) {
self.min_merge_size = min_merge_size;
}
pub fn set_min_segment_size(&mut self, min_segment_size: u32) {
self.min_segment_size = min_segment_size;
/// Set the minimum segment size under which all segment belong
/// to the same level.
pub fn set_min_layer_size(&mut self, min_layer_size: u32) {
self.min_layer_size = min_layer_size;
}
/// Set the ratio between two consecutive levels.
///
/// Segment are group in levels according to their sizes.
/// These levels are defined as intervals of exponentially growing sizes.
/// level_log_size define the factor by which one should multiply the limit
/// to reach a level, in order to get the limit to reach the following
/// level.
pub fn set_level_log_size(&mut self, level_log_size: f64) {
self.level_log_size = level_log_size;
}
@@ -34,25 +45,25 @@ impl LogMergePolicy {
impl MergePolicy for LogMergePolicy {
fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec<MergeCandidate> {
if segments.is_empty() {
return Vec::new();
}
let mut size_sorted_tuples = segments.iter()
.map(|x| x.num_docs)
.enumerate()
.collect::<Vec<(usize, u32)>>();
size_sorted_tuples.sort_by(|x,y| y.cmp(x));
size_sorted_tuples.sort_by(|x, y| y.cmp(x));
let size_sorted_log_tuples: Vec<_> = size_sorted_tuples.into_iter()
.map(|(ind, num_docs)| (ind, (self.clip_min_size(num_docs) as f64).log2()))
.collect();
let (first_ind, first_score) = size_sorted_log_tuples[0];
let mut current_max_log_size = first_score;
let mut levels = vec!(vec!(first_ind));
let mut levels = vec![vec![first_ind]];
for &(ind, score) in (&size_sorted_log_tuples).iter().skip(1) {
if score < (current_max_log_size - self.level_log_size) {
current_max_log_size = score;
@@ -62,7 +73,7 @@ impl MergePolicy for LogMergePolicy {
}
let result = levels.iter()
.filter(|level| {level.len() >= self.min_merge_size})
.filter(|level| level.len() >= self.min_merge_size)
.map(|ind_vec| {
MergeCandidate(ind_vec.iter()
.map(|&ind| segments[ind].segment_id)
@@ -78,7 +89,7 @@ impl Default for LogMergePolicy {
fn default() -> LogMergePolicy {
LogMergePolicy {
min_merge_size: DEFAULT_MIN_MERGE_SIZE,
min_segment_size: DEFAULT_MIN_SEGMENT_SIZE,
min_layer_size: DEFAULT_MIN_LAYER_SIZE,
level_log_size: DEFAULT_LEVEL_LOG_SIZE,
}
}
@@ -93,7 +104,7 @@ mod tests {
fn test_merge_policy() -> LogMergePolicy {
let mut log_merge_policy = LogMergePolicy::default();
log_merge_policy.set_min_merge_size(3);
log_merge_policy.set_min_segment_size(2);
log_merge_policy.set_min_layer_size(2);
log_merge_policy
}
@@ -108,7 +119,7 @@ mod tests {
fn test_log_merge_policy_pair() {
let test_input = vec![SegmentMeta::new(SegmentId::generate_random(), 10),
SegmentMeta::new(SegmentId::generate_random(), 10),
SegmentMeta::new(SegmentId::generate_random(), 10)];
SegmentMeta::new(SegmentId::generate_random(), 10)];
let result_list = test_merge_policy().compute_merge_candidates(&test_input);
assert_eq!(result_list.len(), 1);
}

View File

@@ -278,7 +278,7 @@ mod tests {
use DocAddress;
use collector::tests::FastFieldTestCollector;
use collector::tests::TestCollector;
use query::MultiTermQuery;
use query::BooleanQuery;
use schema::TextIndexingOptions;
#[test]
@@ -341,7 +341,7 @@ mod tests {
let searcher = index.searcher();
let get_doc_ids = |terms: Vec<Term>| {
let mut collector = TestCollector::default();
let query = MultiTermQuery::from(terms);
let query = BooleanQuery::new_multiterms_query(terms);
assert!(searcher.search(&query, &mut collector).is_ok());
collector.docs()
};
@@ -385,7 +385,7 @@ mod tests {
}
{
let get_fast_vals = |terms: Vec<Term>| {
let query = MultiTermQuery::from(terms);
let query = BooleanQuery::new_multiterms_query(terms);
let mut collector = FastFieldTestCollector::for_field(score_field);
assert!(searcher.search(&query, &mut collector).is_ok());
collector.vals().clone()

View File

@@ -157,9 +157,9 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
mod tests {
use collector::tests::TestCollector;
use query::MultiTermQuery;
use Index;
use core::SegmentReader;
use query::BooleanQuery;
use schema::*;
use DocSet;
use Postings;
@@ -316,7 +316,7 @@ mod tests {
{
let searcher = index.searcher();
let get_doc_ids = |terms: Vec<Term>| {
let query = MultiTermQuery::from(terms);
let query = BooleanQuery::new_multiterms_query(terms);
let mut collector = TestCollector::default();
assert!(searcher.search(&query, &mut collector).is_ok());
collector.docs()

View File

@@ -4,6 +4,9 @@ use super::boolean_weight::BooleanWeight;
use query::Weight;
use Searcher;
use query::Query;
use schema::Term;
use query::TermQuery;
use postings::SegmentPostingsOption;
use query::Occur;
use query::OccurFilter;
@@ -19,7 +22,7 @@ use query::OccurFilter;
/// a `MustNot` occurence.
#[derive(Debug)]
pub struct BooleanQuery {
subqueries: Vec<(Occur, Box<Query>)>
subqueries: Vec<(Occur, Box<Query>)>,
}
impl From<Vec<(Occur, Box<Query>)>> for BooleanQuery {
@@ -45,4 +48,18 @@ impl Query for BooleanQuery {
let filter = OccurFilter::new(&occurs);
Ok(box BooleanWeight::new(sub_weights, filter))
}
}
impl BooleanQuery {
/// Helper method to create a boolean query matching a given list of terms.
/// The resulting query is a disjunction of the terms.
pub fn new_multiterms_query(terms: Vec<Term>) -> BooleanQuery {
let occur_term_queries: Vec<(Occur, Box<Query>)> = terms.into_iter()
.map(|term| {
let term_query: Box<Query> = box TermQuery::new(term, SegmentPostingsOption::Freq);
(Occur::Should, term_query)
})
.collect();
BooleanQuery::from(occur_term_queries)
}
}

View File

@@ -42,10 +42,6 @@ pub struct BooleanScorer<TScorer: Scorer> {
impl<TScorer: Scorer> BooleanScorer<TScorer> {
pub fn scorers(&self) -> &[TScorer] {
&self.scorers
}
pub fn new(scorers: Vec<TScorer>,
occur_filter: OccurFilter) -> BooleanScorer<TScorer> {
let score_combiner = ScoreCombiner::default_for_num_scorers(scorers.len());

View File

@@ -24,6 +24,7 @@ mod tests {
use Index;
use schema::*;
use fastfield::{U32FastFieldReader};
use postings::SegmentPostingsOption;
fn abs_diff(left: f32, right: f32) -> f32 {
(right - left).abs()

View File

@@ -5,18 +5,15 @@
mod query;
mod boolean_query;
mod multi_term_query;
mod phrase_query;
mod scorer;
mod occur;
mod weight;
mod occur_filter;
mod term_query;
mod query_parser;
mod phrase_query;
pub use self::boolean_query::BooleanQuery;
pub use self::multi_term_query::MultiTermQuery;
pub use self::multi_term_query::MultiTermWeight;
pub use self::occur_filter::OccurFilter;
pub use self::occur::Occur;
pub use self::phrase_query::PhraseQuery;

View File

@@ -1,5 +0,0 @@
mod multi_term_query;
mod multi_term_weight;
pub use self::multi_term_query::MultiTermQuery;
pub use self::multi_term_weight::MultiTermWeight;

View File

@@ -1,73 +0,0 @@
use Result;
use query::Weight;
use std::any::Any;
use schema::Term;
use query::MultiTermWeight;
use query::Query;
use core::searcher::Searcher;
use query::occur::Occur;
use query::occur_filter::OccurFilter;
use query::term_query::TermQuery;
use postings::SegmentPostingsOption;
/// Query involving one or more terms.
#[derive(Eq, Clone, PartialEq, Debug)]
pub struct MultiTermQuery {
// TODO need a better Debug
occur_terms: Vec<(Occur, Term)>,
}
impl MultiTermQuery {
/// Accessor for the number of terms
pub fn num_terms(&self) -> usize {
self.occur_terms.len()
}
/// Same as `weight()`, except that rather than a boxed trait,
/// `specialized_weight` returns a specific type of the weight, allowing for
/// compile-time optimization.
pub fn specialized_weight(&self, searcher: &Searcher) -> MultiTermWeight {
let term_queries: Vec<TermQuery> = self.occur_terms
.iter()
.map(|&(_, ref term)| TermQuery::new(term.clone()), SegmentPostingsOption::FreqAndPositions)
.collect();
let occurs: Vec<Occur> = self.occur_terms
.iter()
.map(|&(occur, _)| occur.clone())
.collect();
let occur_filter = OccurFilter::new(&occurs);
let weights = term_queries.iter()
.map(|term_query| term_query.specialized_weight(searcher))
.collect();
MultiTermWeight::new(weights, occur_filter)
}
}
impl Query for MultiTermQuery {
fn as_any(&self) -> &Any {
self
}
fn weight(&self, searcher: &Searcher) -> Result<Box<Weight>> {
Ok(box self.specialized_weight(searcher))
}
}
impl From<Vec<(Occur, Term)>> for MultiTermQuery {
fn from(occur_terms: Vec<(Occur, Term)>) -> MultiTermQuery {
MultiTermQuery { occur_terms: occur_terms }
}
}
impl From<Vec<Term>> for MultiTermQuery {
fn from(terms: Vec<Term>) -> MultiTermQuery {
let should_terms: Vec<(Occur, Term)> = terms.into_iter()
.map(|term| (Occur::Should, term))
.collect();
MultiTermQuery::from(should_terms)
}
}

View File

@@ -1,45 +0,0 @@
use Result;
use query::Weight;
use core::SegmentReader;
use query::Scorer;
use query::occur_filter::OccurFilter;
use postings::SegmentPostings;
use query::term_query::{TermWeight, TermScorer};
use query::boolean_query::BooleanScorer;
/// Weight object associated to a [`MultiTermQuery`](./struct.MultiTermQuery.html).
pub struct MultiTermWeight {
weights: Vec<TermWeight>,
occur_filter: OccurFilter,
}
impl MultiTermWeight {
/// MultiTermWeigh constructor.
/// The `OccurFilter` is tied with the weights order.
pub fn new(weights: Vec<TermWeight>, occur_filter: OccurFilter) -> MultiTermWeight {
MultiTermWeight {
weights: weights,
occur_filter: occur_filter,
}
}
/// Same as `scorer()`, except that rather than a boxed trait,
/// `specialized_scorer` returns a specific type of the scorer, allowing for
/// compile-time optimization.
pub fn specialized_scorer<'a>(&'a self,
reader: &'a SegmentReader)
-> Result<BooleanScorer<TermScorer<SegmentPostings<'a>>>> {
let mut term_scorers: Vec<TermScorer<_>> = Vec::new();
for term_weight in &self.weights {
let term_scorer = try!(term_weight.specialized_scorer(reader));
term_scorers.push(term_scorer);
}
Ok(BooleanScorer::new(term_scorers, self.occur_filter))
}
}
impl Weight for MultiTermWeight {
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
Ok(box try!(self.specialized_scorer(reader)))
}
}

View File

@@ -2,7 +2,7 @@
/// should be present or must not be present.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum Occur {
/// For a given document to be considered for scoring,
/// For a given document to be considered for scoring,
/// at least one of the document with the Should or the Must
/// Occur constraint must be within the document.
Should,
@@ -12,27 +12,3 @@ pub enum Occur {
/// search.
MustNot,
}
impl Occur {
pub fn compose(&self, other: Occur) -> Occur {
match *self {
Occur::Should => other,
Occur::Must => {
if other == Occur::MustNot {
Occur::MustNot
}
else {
Occur::Must
}
}
Occur::MustNot => {
if other == Occur::MustNot {
Occur::Must
}
else {
Occur::MustNot
}
}
}
}
}

View File

@@ -1,7 +1,7 @@
mod query_parser;
mod query_grammar;
mod user_input_ast;
mod logical_ast;
pub mod logical_ast;
pub use self::query_parser::QueryParser;
pub use self::query_parser::QueryParserError;

View File

@@ -2,79 +2,83 @@ use combine::*;
use combine::char::*;
use super::user_input_ast::*;
fn literal<I>(input: I) -> ParseResult<UserInputAST, I>
where I: Stream<Item = char> {
fn literal<I>(input: I) -> ParseResult<UserInputAST, I>
where I: Stream<Item = char>
{
let term_val = || {
let word = many1(satisfy(|c: char| c.is_alphanumeric()));
let phrase =
(char('"'), many1(satisfy(|c| c != '"')), char('"'),)
.map(|(_, s, _)| s);
let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s);
phrase.or(word)
};
let field = many1(letter());
let term_query = (field, char(':'), term_val())
.map(|(field_name,_, phrase)| {
UserInputLiteral {
field_name: Some(field_name),
phrase: phrase
}
});
let term_default_field = term_val()
.map(|phrase| {
UserInputLiteral {
field_name: None,
phrase: phrase
}
});
try(term_query).or(term_default_field)
.map(|query_literal| UserInputAST::from(query_literal))
.parse_stream(input)
}
let term_query = (field, char(':'), term_val()).map(|(field_name, _, phrase)| {
UserInputLiteral {
field_name: Some(field_name),
phrase: phrase,
}
});
let term_default_field = term_val().map(|phrase| {
UserInputLiteral {
field_name: None,
phrase: phrase,
}
});
try(term_query)
.or(term_default_field)
.map(|query_literal| UserInputAST::from(query_literal))
.parse_stream(input)
}
fn leaf<I>(input: I) -> ParseResult<UserInputAST, I>
where I: Stream<Item = char> {
(char('-'), parser(literal)).map(|(_, expr)| UserInputAST::Not(box expr))
.or((char('+'), parser(literal)).map(|(_, expr)| UserInputAST::Must(box expr)))
.or(parser(literal))
.parse_stream(input)
fn leaf<I>(input: I) -> ParseResult<UserInputAST, I>
where I: Stream<Item = char>
{
(char('-'), parser(literal))
.map(|(_, expr)| UserInputAST::Not(box expr))
.or((char('+'), parser(literal)).map(|(_, expr)| UserInputAST::Must(box expr)))
.or(parser(literal))
.parse_stream(input)
}
pub fn parse_to_ast<I>(input: I) -> ParseResult<UserInputAST, I>
where I: Stream<Item = char>
{
{
sep_by(parser(leaf), spaces())
.map(|subqueries: Vec<UserInputAST>| {
if subqueries.len() == 1 {
subqueries.into_iter().next().unwrap()
}
else {
UserInputAST::Clause(subqueries.into_iter().map(Box::new).collect())
}
})
.parse_stream(input)
.map(|subqueries: Vec<UserInputAST>| {
if subqueries.len() == 1 {
subqueries.into_iter().next().unwrap()
} else {
UserInputAST::Clause(subqueries.into_iter().map(Box::new).collect())
}
})
.parse_stream(input)
}
#[cfg(test)]
mod test {
use super::*;
fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
let query = parse_to_ast(query).unwrap().0;
let query_str = format!("{:?}", query);
assert_eq!(query_str, expected);
}
fn test_is_parse_err(query: &str) {
assert!(parse_to_ast(query).is_err());
}
#[test]
pub fn test_parse_query_to_ast() {
fn test_parse_query_to_ast() {
test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\"");
test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")");
test_parse_query_to_ast_helper("+abc:toto -titi", "+(abc:\"toto\") -(\"titi\")");
test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")");
test_parse_query_to_ast_helper("abc:a b", "abc:\"a\" \"b\"");
test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\"");
test_is_parse_err("abc + ");
}
}

View File

@@ -6,6 +6,7 @@ use super::user_input_ast::*;
use super::query_grammar::parse_to_ast;
use query::Occur;
use query::TermQuery;
use postings::SegmentPostingsOption;
use query::PhraseQuery;
use analyzer::SimpleTokenizer;
use analyzer::StreamingIterator;
@@ -25,9 +26,10 @@ pub enum QueryParserError {
/// The query contains a term for a `u32`-field, but the value
/// is not a u32.
ExpectedU32(String, String),
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
AllButQueryForbidden,
/// If no default field is declared, running a query without any
/// field specified is forbbidden.
NoDefaultFieldDeclared,
}
@@ -37,14 +39,14 @@ pub enum QueryParserError {
///
/// The language covered by the current parser is extremely simple.
///
/// * simple terms: "e.g.: `Barack Obama` are simply analyzed using
/// * simple terms: "e.g.: `Barack Obama` are simply analyzed using
/// tantivy's `StandardTokenizer`, hence becoming `["barack", "obama"]`.
/// The terms are then searched within the default terms of the query parser.
///
///
/// e.g. If `body` and `title` are default fields, our example terms are
/// `["title:barack", "body:barack", "title:obama", "body:obama"]`.
/// By default, all tokenized and indexed fields are default fields.
///
///
/// Multiple terms are handled as an `OR` : any document containing at least
/// one of the term will go through the scoring.
///
@@ -54,13 +56,13 @@ pub enum QueryParserError {
/// are not relevant anymore.
/// Making it possible to make this behavior customizable is tracked in
/// [issue #27](https://github.com/fulmicoton/tantivy/issues/27).
///
///
/// * negative terms: By prepending a term by a `-`, a term can be excluded
/// from the search. This is useful for disambiguating a query.
/// e.g. `apple -fruit`
/// e.g. `apple -fruit`
///
/// * must terms: By prepending a term by a `+`, a term can be made required for the search.
///
///
pub struct QueryParser {
schema: Schema,
default_fields: Vec<Field>,
@@ -73,16 +75,18 @@ impl QueryParser {
/// * schema - index Schema
/// * default_fields - fields used to search if no field is specifically defined
/// in the query.
pub fn new(schema: Schema,
default_fields: Vec<Field>) -> QueryParser {
pub fn new(schema: Schema, default_fields: Vec<Field>) -> QueryParser {
QueryParser {
schema: schema,
default_fields: default_fields,
conjunction_by_default: false,
analyzer: box SimpleTokenizer,
}
}
}
/// Set the default way to compose queries to a conjunction.
///
/// By default a ,
pub fn set_conjunction_by_default(&mut self) {
self.conjunction_by_default = true;
}
@@ -91,36 +95,44 @@ impl QueryParser {
///
/// Note that `parse_query` returns an error if the input
/// is not a valid query.
///
///
/// There is currently no lenient mode for the query parser
/// which makes it a bad choice for a public/broad user search engine.
///
/// Implementing a lenient mode for this query parser is tracked
/// Implementing a lenient mode for this query parser is tracked
/// in [Issue 5](https://github.com/fulmicoton/tantivy/issues/5)
pub fn parse_query(&self, query: &str) -> Result<Box<Query>, QueryParserError> {
let logical_ast = self.parse_query_to_logical_ast(query)?;
Ok(self.convert_to_query(logical_ast))
Ok(convert_to_query(logical_ast))
}
pub fn parse_query_to_logical_ast(&self, query: &str) -> Result<LogicalAST, QueryParserError> {
let (user_input_ast, remaining) = parse_to_ast(query).map_err(|_| QueryParserError::SyntaxError)?;
/// Parse the user query into an AST.
fn parse_query_to_logical_ast(&self, query: &str) -> Result<LogicalAST, QueryParserError> {
let (user_input_ast, _remaining) =
parse_to_ast(query).map_err(|_| QueryParserError::SyntaxError)?;
self.compute_logical_ast(user_input_ast)
}
fn resolve_field_name(&self, field_name: &str) -> Result<Field, QueryParserError> {
self.schema.get_field(field_name)
.ok_or_else(|| QueryParserError::FieldDoesNotExist(String::from(field_name)))
self.schema
.get_field(field_name)
.ok_or_else(|| QueryParserError::FieldDoesNotExist(String::from(field_name)))
}
pub fn compute_logical_ast(&self, user_input_ast: UserInputAST) -> Result<LogicalAST, QueryParserError> {
fn compute_logical_ast(&self,
user_input_ast: UserInputAST)
-> Result<LogicalAST, QueryParserError> {
let (occur, ast) = try!(self.compute_logical_ast_with_occur(user_input_ast));
if occur == Occur::MustNot {
return Err(QueryParserError::AllButQueryForbidden)
return Err(QueryParserError::AllButQueryForbidden);
}
Ok(ast)
Ok(ast)
}
fn compute_logical_ast_for_leaf(&self, field: Field, phrase: &str) -> Result<Option<LogicalLiteral>, QueryParserError> {
fn compute_logical_ast_for_leaf(&self,
field: Field,
phrase: &str)
-> Result<Option<LogicalLiteral>, QueryParserError> {
let mut token_iter = self.analyzer.tokenize(phrase);
let mut tokens: Vec<Term> = Vec::new();
loop {
@@ -129,98 +141,63 @@ impl QueryParser {
// TODO Handle u32
let term = Term::from_field_text(field, &text);
tokens.push(term);
}
else {
} else {
break;
}
}
if tokens.is_empty() {
Ok(None)
}
else if tokens.len() == 1 {
} else if tokens.len() == 1 {
Ok(Some(LogicalLiteral::Term(tokens.into_iter().next().unwrap())))
}
else {
} else {
Ok(Some(LogicalLiteral::Phrase(tokens)))
}
}
fn default_occur(&self) -> Occur {
if self.conjunction_by_default {
Occur::Must
}
else {
} else {
Occur::Should
}
}
fn convert_literal_to_query(&self, logical_literal: LogicalLiteral) -> Box<Query> {
match logical_literal {
LogicalLiteral::Term(term) => {
let field = term.field();
TODO check the schema to get the correct segment otpins
box TermQuery::from(term)
}
LogicalLiteral::Phrase(terms) => {
TODO check the schema to get the correct segment otpins
box PhraseQuery::from(terms)
}
}
}
fn convert_to_query(&self, logical_ast: LogicalAST) -> Box<Query> {
match logical_ast {
LogicalAST::Clause(clause) => {
let occur_subqueries = clause.into_iter()
.map(|(occur, subquery)| (occur, self.convert_to_query(subquery)))
.collect::<Vec<_>>();
box BooleanQuery::from(occur_subqueries)
}
LogicalAST::Leaf(logical_literal) => {
self.convert_literal_to_query(*logical_literal)
}
}
}
pub fn compute_logical_ast_with_occur(&self, user_input_ast: UserInputAST) -> Result<(Occur, LogicalAST), QueryParserError> {
fn compute_logical_ast_with_occur(&self,
user_input_ast: UserInputAST)
-> Result<(Occur, LogicalAST), QueryParserError> {
match user_input_ast {
UserInputAST::Clause(sub_queries) => {
let default_occur = self.default_occur();
let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(sub_queries
.into_iter()
let logical_sub_queries: Vec<(Occur, LogicalAST)> = try!(sub_queries.into_iter()
.map(|sub_query| self.compute_logical_ast_with_occur(*sub_query))
.map(|res|
res.map(
|(occur, sub_ast)| (default_occur.compose(occur), sub_ast)
)
)
.map(|res| {
res.map(|(occur, sub_ast)| (compose_occur(default_occur, occur), sub_ast))
})
.collect());
Ok((Occur::Should, LogicalAST::Clause(logical_sub_queries)))
}
UserInputAST::Not(subquery) => {
let (occur, logical_sub_queries) = try!(self.compute_logical_ast_with_occur(*subquery));
Ok((Occur::MustNot.compose(occur), logical_sub_queries))
},
let (occur, logical_sub_queries) =
try!(self.compute_logical_ast_with_occur(*subquery));
Ok((compose_occur(Occur::MustNot, occur), logical_sub_queries))
}
UserInputAST::Must(subquery) => {
let (occur, logical_sub_queries) = try!(self.compute_logical_ast_with_occur(*subquery));
Ok((Occur::Must.compose(occur), logical_sub_queries))
},
let (occur, logical_sub_queries) =
try!(self.compute_logical_ast_with_occur(*subquery));
Ok((compose_occur(Occur::Must, occur), logical_sub_queries))
}
UserInputAST::Leaf(literal) => {
let term_phrases: Vec<(Field, String)> = match literal.field_name {
Some(ref field_name) => {
let field = try!(self.resolve_field_name(&field_name));
vec!((field, literal.phrase.clone()))
vec![(field, literal.phrase.clone())]
}
None => {
if self.default_fields.len() == 0 {
return Err(QueryParserError::NoDefaultFieldDeclared)
}
else if self.default_fields.len() == 1 {
vec!((self.default_fields[0], literal.phrase.clone()))
}
else {
return Err(QueryParserError::NoDefaultFieldDeclared);
} else if self.default_fields.len() == 1 {
vec![(self.default_fields[0], literal.phrase.clone())]
} else {
self.default_fields
.iter()
.map(|default_field| (*default_field, literal.phrase.clone()))
@@ -234,29 +211,61 @@ impl QueryParser {
asts.push(LogicalAST::Leaf(box ast));
}
}
let result_ast =
if asts.len() == 0 {
panic!("not working");
}
else if asts.len() == 1 {
asts[0].clone()
}
else {
LogicalAST::Clause(asts
.into_iter()
.map(|ast| (Occur::Should, ast))
.collect())
};
let result_ast = if asts.len() == 0 {
panic!("not working");
} else if asts.len() == 1 {
asts[0].clone()
} else {
LogicalAST::Clause(asts.into_iter()
.map(|ast| (Occur::Should, ast))
.collect())
};
Ok((Occur::Should, result_ast))
}
}
}
}
}
/// Compose two occur values.
fn compose_occur(left: Occur, right: Occur) -> Occur {
match left {
Occur::Should => right,
Occur::Must => {
if right == Occur::MustNot {
Occur::MustNot
} else {
Occur::Must
}
}
Occur::MustNot => {
if right == Occur::MustNot {
Occur::Must
} else {
Occur::MustNot
}
}
}
}
fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box<Query> {
match logical_literal {
LogicalLiteral::Term(term) => box TermQuery::new(term, SegmentPostingsOption::Freq),
LogicalLiteral::Phrase(terms) => box PhraseQuery::from(terms),
}
}
fn convert_to_query(logical_ast: LogicalAST) -> Box<Query> {
match logical_ast {
LogicalAST::Clause(clause) => {
let occur_subqueries = clause.into_iter()
.map(|(occur, subquery)| (occur, convert_to_query(subquery)))
.collect::<Vec<_>>();
box BooleanQuery::from(occur_subqueries)
}
LogicalAST::Leaf(logical_literal) => convert_literal_to_query(*logical_literal),
}
}
@@ -266,13 +275,15 @@ mod test {
use super::QueryParser;
use super::QueryParserError;
use super::super::logical_ast::*;
fn parse_query_to_logical_ast(query: &str, default_conjunction: bool) -> Result<LogicalAST, QueryParserError> {
fn parse_query_to_logical_ast(query: &str,
default_conjunction: bool)
-> Result<LogicalAST, QueryParserError> {
let mut schema_builder = SchemaBuilder::default();
let title = schema_builder.add_text_field("title", TEXT);
let text = schema_builder.add_text_field("text", TEXT);
let schema = schema_builder.build();
let default_fields = vec!(title, text);
let default_fields = vec![title, text];
let mut query_parser = QueryParser::new(schema, default_fields);
if default_conjunction {
query_parser.set_conjunction_by_default();
@@ -280,7 +291,9 @@ mod test {
query_parser.parse_query_to_logical_ast(query)
}
fn test_parse_query_to_logical_ast_helper(query: &str, expected: &str, default_conjunction: bool) {
fn test_parse_query_to_logical_ast_helper(query: &str,
expected: &str,
default_conjunction: bool) {
let query = parse_query_to_logical_ast(query, default_conjunction).unwrap();
let query_str = format!("{:?}", query);
assert_eq!(query_str, expected);
@@ -288,21 +301,43 @@ mod test {
#[test]
pub fn test_parse_query_to_ast_disjunction() {
test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", false);
test_parse_query_to_logical_ast_helper("+title:toto", "Term([0, 116, 111, 116, 111])", false);
test_parse_query_to_logical_ast_helper("+title:toto -titi", "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, 105, 116, 105]) Term([1, 116, 105, 116, 105])))", false);
assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(), QueryParserError::AllButQueryForbidden);
test_parse_query_to_logical_ast_helper("title:a b", "(Term([0, 97]) (Term([0, 98]) Term([1, 98])))", false);
test_parse_query_to_logical_ast_helper("title:\"a b\"", "\"[Term([0, 97]), Term([0, 98])]\"", false);
test_parse_query_to_logical_ast_helper("title:toto",
"Term([0, 116, 111, 116, 111])",
false);
test_parse_query_to_logical_ast_helper("+title:toto",
"Term([0, 116, 111, 116, 111])",
false);
test_parse_query_to_logical_ast_helper("+title:toto -titi",
"(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \
105, 116, 105]) Term([1, 116, 105, 116, 105])))",
false);
assert_eq!(parse_query_to_logical_ast("-title:toto", false).err().unwrap(),
QueryParserError::AllButQueryForbidden);
test_parse_query_to_logical_ast_helper("title:a b",
"(Term([0, 97]) (Term([0, 98]) Term([1, 98])))",
false);
test_parse_query_to_logical_ast_helper("title:\"a b\"",
"\"[Term([0, 97]), Term([0, 98])]\"",
false);
}
#[test]
pub fn test_parse_query_to_ast_conjunction() {
test_parse_query_to_logical_ast_helper("title:toto", "Term([0, 116, 111, 116, 111])", true);
test_parse_query_to_logical_ast_helper("+title:toto", "Term([0, 116, 111, 116, 111])", true);
test_parse_query_to_logical_ast_helper("+title:toto -titi", "(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, 105, 116, 105]) Term([1, 116, 105, 116, 105])))", true);
assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(), QueryParserError::AllButQueryForbidden);
test_parse_query_to_logical_ast_helper("title:a b", "(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))", true);
test_parse_query_to_logical_ast_helper("title:\"a b\"", "\"[Term([0, 97]), Term([0, 98])]\"", true);
test_parse_query_to_logical_ast_helper("+title:toto",
"Term([0, 116, 111, 116, 111])",
true);
test_parse_query_to_logical_ast_helper("+title:toto -titi",
"(+Term([0, 116, 111, 116, 111]) -(Term([0, 116, \
105, 116, 105]) Term([1, 116, 105, 116, 105])))",
true);
assert_eq!(parse_query_to_logical_ast("-title:toto", true).err().unwrap(),
QueryParserError::AllButQueryForbidden);
test_parse_query_to_logical_ast_helper("title:a b",
"(+Term([0, 97]) +(Term([0, 98]) Term([1, 98])))",
true);
test_parse_query_to_logical_ast_helper("title:\"a b\"",
"\"[Term([0, 97]), Term([0, 98])]\"",
true);
}
}

View File

@@ -23,6 +23,14 @@ pub struct TermQuery {
}
impl TermQuery {
/// Creates a new term query.
pub fn new(term: Term, segment_postings_options: SegmentPostingsOption) -> TermQuery {
TermQuery {
term: term,
segment_postings_options: segment_postings_options,
}
}
/// Returns a weight object.
///
@@ -37,14 +45,6 @@ impl TermQuery {
segment_postings_options: self.segment_postings_options,
}
}
pub fn new(term: Term, segment_postings_options: SegmentPostingsOption) -> TermQuery {
TermQuery {
term: term,
segment_postings_options: segment_postings_options,
}
}
}
impl Query for TermQuery {