diff --git a/CHANGELOG.md b/CHANGELOG.md index 2681f6d6f..de66f2d23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,11 @@ Tantivy 0.4.0 ========================== - Removed u32 fields. They are replaced by u64 and i64 fields (#65) -- - +- QueryParser: + - Explicit error returned when searched for a term that is not indexed + - Searching for a int term via the query parser was broken `(age:1)` + - Searching for a non-indexed field returns an explicit Error + - Phrase query for non-tokenized field are not tokenized by the query parser. Tantivy 0.3.1 ========================== diff --git a/src/fastfield/error.rs b/src/fastfield/error.rs index f9f0be0d6..50776a179 100644 --- a/src/fastfield/error.rs +++ b/src/fastfield/error.rs @@ -16,7 +16,7 @@ impl FastFieldNotAvailableError { /// for which fast fields are not available. pub fn new(field_entry: &FieldEntry) -> FastFieldNotAvailableError { FastFieldNotAvailableError { - field_name: field_entry.name().clone(), + field_name: field_entry.name().to_string(), } } } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 9c85fa146..02c92df26 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -12,6 +12,7 @@ pub struct FastFieldsWriter { impl FastFieldsWriter { + /// Create all `FastFieldWriter` required by the schema. pub fn from_schema(schema: &Schema) -> FastFieldsWriter { let field_writers: Vec = schema .fields() @@ -47,6 +48,9 @@ impl FastFieldsWriter { } } + /// Returns a `FastFieldsWriter` + /// with a `IntFastFieldWriter` for each + /// of the field given in argument. pub fn new(fields: Vec) -> FastFieldsWriter { FastFieldsWriter { field_writers: fields diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index f1a492859..ccd0974fa 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -374,14 +374,14 @@ impl SegmentUpdater { self.run_async(move |segment_updater| { debug!("End merge {:?}", after_merge_segment_entry.meta()); let mut delete_cursor = after_merge_segment_entry.delete_cursor().clone(); - let mut file_protection_opt = None; + let mut _file_protection_opt = None; if let Some(delete_operation) = delete_cursor.get() { let committed_opstamp = segment_updater.0.index.opstamp(); if delete_operation.opstamp < committed_opstamp { let segment = segment_updater.0.index.segment(after_merge_segment_entry.meta().clone()); match advance_deletes(segment, &mut after_merge_segment_entry, committed_opstamp) { Ok(file_protection_opt_res) => { - file_protection_opt = file_protection_opt_res; + _file_protection_opt = file_protection_opt_res; } Err(e) => { error!("Merge of {:?} was cancelled (advancing deletes failed): {:?}", before_merge_segment_ids, e); diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index bef6cfd38..26b629050 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -10,8 +10,21 @@ fn literal(input: I) -> ParseResult let phrase = (char('"'), many1(satisfy(|c| c != '"')), char('"')).map(|(_, s, _)| s); phrase.or(word) }; - let field = many1(letter()); - let term_query = (field, char(':'), term_val()).map(|(field_name, _, phrase)| { + + let negative_numbers = + (char('-'), many1(satisfy(|c: char| c.is_numeric()))) + .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); + + let field = + ( + letter(), + many(satisfy(|c: char| c.is_alphanumeric() || c == '_')) + ) + .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)); + + let term_val_with_field = negative_numbers.or(term_val()); + + let term_query = (field, char(':'), term_val_with_field).map(|(field_name, _, phrase)| { UserInputLiteral { field_name: Some(field_name), phrase: phrase, diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 9338266fe..6d76e95b1 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -10,7 +10,9 @@ use postings::SegmentPostingsOption; use query::PhraseQuery; use analyzer::SimpleTokenizer; use analyzer::StreamingIterator; -use schema::Term; +use schema::{Term, FieldType}; +use std::str::FromStr; +use std::num::ParseIntError; @@ -24,15 +26,23 @@ pub enum QueryParserError { FieldDoesNotExist(String), /// The query contains a term for a `u64`-field, but the value /// is not a u64. - ExpectedU64(String, String), + ExpectedInt(ParseIntError), /// It is forbidden queries that are only "excluding". (e.g. -title:pop) AllButQueryForbidden, /// If no default field is declared, running a query without any /// field specified is forbbidden. NoDefaultFieldDeclared, + /// The field searched for is not declared + /// as indexed in the schema. + FieldNotIndexed(String), } +impl From for QueryParserError { + fn from(err: ParseIntError) -> QueryParserError { + QueryParserError::ExpectedInt(err) + } +} /// Tantivy's Query parser /// @@ -121,7 +131,7 @@ impl QueryParser { fn compute_logical_ast(&self, user_input_ast: UserInputAST) -> Result { - let (occur, ast) = try!(self.compute_logical_ast_with_occur(user_input_ast)); + let (occur, ast) = self.compute_logical_ast_with_occur(user_input_ast)?; if occur == Occur::MustNot { return Err(QueryParserError::AllButQueryForbidden); } @@ -132,25 +142,51 @@ impl QueryParser { field: Field, phrase: &str) -> Result, QueryParserError> { - let mut token_iter = self.analyzer.tokenize(phrase); - let mut tokens: Vec = Vec::new(); - loop { - if let Some(token) = token_iter.next() { - let text = token.to_string(); - // TODO Handle u64 - let term = Term::from_field_text(field, &text); - tokens.push(term); - } else { - break; + + let field_entry = self.schema.get_field_entry(field); + let field_type = field_entry.field_type(); + if !field_type.is_indexed() { + let field_name = field_entry.name().to_string(); + return Err(QueryParserError::FieldNotIndexed(field_name)); + } + match field_type { + &FieldType::I64(_) => { + let val: i64 = i64::from_str(phrase)?; + let term = Term::from_field_i64(field, val); + return Ok(Some(LogicalLiteral::Term(term))); + } + &FieldType::U64(_) => { + let val: u64 = u64::from_str(phrase)?; + let term = Term::from_field_u64(field, val); + return Ok(Some(LogicalLiteral::Term(term))); + } + &FieldType::Str(ref str_options) => { + let mut terms: Vec = Vec::new(); + if str_options.get_indexing_options().is_tokenized() { + let mut token_iter = self.analyzer.tokenize(phrase); + loop { + if let Some(token) = token_iter.next() { + let term = Term::from_field_text(field, token); + terms.push(term); + } else { + break; + } + } + } + else { + terms.push(Term::from_field_text(field, phrase)); + } + if terms.is_empty() { + return Ok(None); + } + else if terms.len() == 1 { + return Ok(Some(LogicalLiteral::Term(terms.into_iter().next().unwrap()))) + } else { + return Ok(Some(LogicalLiteral::Phrase(terms))) + } } } - if tokens.is_empty() { - Ok(None) - } else if tokens.len() == 1 { - Ok(Some(LogicalLiteral::Term(tokens.into_iter().next().unwrap()))) - } else { - Ok(Some(LogicalLiteral::Phrase(tokens))) - } + } fn default_occur(&self) -> Occur { @@ -208,23 +244,23 @@ impl QueryParser { asts.push(LogicalAST::Leaf(box ast)); } } - let result_ast = if asts.len() == 0 { - // this should never happen - return Err(QueryParserError::SyntaxError); - } else if asts.len() == 1 { - asts[0].clone() - } else { - LogicalAST::Clause(asts.into_iter() - .map(|ast| (Occur::Should, ast)) - .collect()) - }; + let result_ast = + if asts.len() == 0 { + // this should never happen + return Err(QueryParserError::SyntaxError); + } else if asts.len() == 1 { + asts[0].clone() + } else { + LogicalAST::Clause(asts.into_iter() + .map(|ast| (Occur::Should, ast)) + .collect()) + }; Ok((Occur::Should, result_ast)) } } } } - /// Compose two occur values. fn compose_occur(left: Occur, right: Occur) -> Occur { match left { @@ -269,16 +305,23 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box { #[cfg(test)] mod test { - use schema::{SchemaBuilder, TEXT}; + use schema::{SchemaBuilder, Term, TEXT, STRING, STORED, INT_INDEXED}; + use query::Query; + use schema::Field; use super::QueryParser; use super::QueryParserError; use super::super::logical_ast::*; - fn make_query_parser() -> QueryParser { let mut schema_builder = SchemaBuilder::default(); let title = schema_builder.add_text_field("title", TEXT); let text = schema_builder.add_text_field("text", TEXT); + schema_builder.add_i64_field("signed", INT_INDEXED); + schema_builder.add_u64_field("unsigned", INT_INDEXED); + schema_builder.add_text_field("notindexed_text", STORED); + schema_builder.add_text_field("notindexed_u64", STORED); + schema_builder.add_text_field("notindexed_i64", STORED); + schema_builder.add_text_field("nottokenized", STRING); let schema = schema_builder.build(); let default_fields = vec![title, text]; QueryParser::new(schema, default_fields) @@ -309,6 +352,64 @@ mod test { let query_parser = make_query_parser(); assert!(query_parser.parse_query("toto").is_ok()); } + + #[test] + pub fn test_parse_nonindexed_field_yields_error() { + let query_parser = make_query_parser(); + + let is_not_indexed_err = |query: &str| { + let result: Result, QueryParserError> = query_parser.parse_query(query); + if let Err(QueryParserError::FieldNotIndexed(field_name)) = result { + Some(field_name.clone()) + } + else { + None + } + }; + + assert_eq!( + is_not_indexed_err("notindexed_text:titi"), + Some(String::from("notindexed_text")) + ); + assert_eq!( + is_not_indexed_err("notindexed_u64:23424"), + Some(String::from("notindexed_u64")) + ); + assert_eq!( + is_not_indexed_err("notindexed_i64:-234324"), + Some(String::from("notindexed_i64")) + ); + } + + + #[test] + pub fn test_parse_query_untokenized() { + test_parse_query_to_logical_ast_helper("nottokenized:\"wordone wordtwo\"", + "Term([0, 0, 0, 7, 119, 111, 114, 100, 111, 110, 101, 32, 119, 111, 114, 100, 116, 119, 111])", + false); + } + + #[test] + pub fn test_parse_query_ints() { + let query_parser = make_query_parser(); + assert!(query_parser.parse_query("signed:2324").is_ok()); + assert!(query_parser.parse_query("signed:\"22\"").is_ok()); + assert!(query_parser.parse_query("signed:\"-2234\"").is_ok()); + assert!(query_parser.parse_query("signed:\"-9999999999999\"").is_ok()); + assert!(query_parser.parse_query("signed:\"a\"").is_err()); + assert!(query_parser.parse_query("signed:\"2a\"").is_err()); + assert!(query_parser.parse_query("signed:\"18446744073709551615\"").is_err()); + assert!(query_parser.parse_query("unsigned:\"2\"").is_ok()); + assert!(query_parser.parse_query("unsigned:\"-2\"").is_err()); + assert!(query_parser.parse_query("unsigned:\"18446744073709551615\"").is_ok()); + test_parse_query_to_logical_ast_helper("unsigned:2324", + "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])", + false); + + test_parse_query_to_logical_ast_helper("signed:-2324", + &format!("{:?}", Term::from_field_i64(Field(2u32), -2324)), + false); + } #[test] diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index fde39caec..ad2292248 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -51,7 +51,7 @@ impl FieldEntry { /// Returns the name of the field - pub fn name(&self,) -> &String { + pub fn name(&self,) -> &str { &self.name } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 0a193a08d..417598951 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -31,6 +31,20 @@ pub enum FieldType { impl FieldType { + pub fn is_indexed(&self) -> bool { + match self { + &FieldType::Str(ref text_options) => { + text_options.get_indexing_options().is_indexed() + } + &FieldType::U64(ref int_options) => { + int_options.is_indexed() + } + &FieldType::I64(ref int_options) => { + int_options.is_indexed() + } + } + } + /// Parses a field value from json, given the target FieldType. /// /// Tantivy will not try to cast values. diff --git a/src/schema/schema.rs b/src/schema/schema.rs index c62555d67..b1b146c50 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -107,7 +107,7 @@ impl SchemaBuilder { /// Adds a field entry to the schema in build. fn add_field(&mut self, field_entry: FieldEntry) -> Field { let field = Field(self.fields.len() as u32); - let field_name = field_entry.name().clone(); + let field_name = field_entry.name().to_string(); self.fields.push(field_entry); self.fields_map.insert(field_name, field); field @@ -173,7 +173,7 @@ impl Schema { } /// Return the field name for a given `Field`. - pub fn get_field_name(&self, field: Field) -> &String { + pub fn get_field_name(&self, field: Field) -> &str { self.get_field_entry(field).name() } @@ -205,7 +205,7 @@ impl Schema { .map(|field_val| field_val.value() ) .cloned() .collect(); - field_map.insert(field_name.clone(), values); + field_map.insert(field_name.to_string(), values); } NamedFieldDocument(field_map) }