mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-05-31 07:30:39 +00:00
add support for float (#603)
* add basic support for float as for i64, they are mapped to u64 for indexing query parser don't work yet * Update value.rs * implement support for float in query parser * Update README.md
This commit is contained in:
committed by
Paul Masurel
parent
c3231ca252
commit
6eb4e08636
@@ -20,7 +20,7 @@ parser! {
|
||||
parser! {
|
||||
fn word[I]()(I) -> String
|
||||
where [I: Stream<Item = char>] {
|
||||
many1(satisfy(char::is_alphanumeric))
|
||||
many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
|
||||
.and_then(|s: String| {
|
||||
match s.as_str() {
|
||||
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
|
||||
@@ -266,6 +266,7 @@ mod test {
|
||||
test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
|
||||
test_parse_query_to_ast_helper("(+a +b)", "(+(\"a\") +(\"b\"))");
|
||||
test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\"");
|
||||
test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\"");
|
||||
test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")");
|
||||
test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+(abc:\"toto\") -(\"titi\"))");
|
||||
test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")");
|
||||
@@ -277,6 +278,7 @@ mod test {
|
||||
test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
|
||||
test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
|
||||
test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
|
||||
test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
|
||||
test_is_parse_err("abc + ");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use crate::schema::{FieldType, Term};
|
||||
use crate::tokenizer::TokenizerManager;
|
||||
use combine::Parser;
|
||||
use std::borrow::Cow;
|
||||
use std::num::ParseIntError;
|
||||
use std::num::{ParseIntError, ParseFloatError};
|
||||
use std::ops::Bound;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -30,9 +30,12 @@ pub enum QueryParserError {
|
||||
/// `FieldDoesNotExist(field_name: String)`
|
||||
/// The query references a field that is not in the schema
|
||||
FieldDoesNotExist(String),
|
||||
/// The query contains a term for a `u64`-field, but the value
|
||||
/// is not a u64.
|
||||
/// The query contains a term for a `u64` or `i64`-field, but the value
|
||||
/// is neither.
|
||||
ExpectedInt(ParseIntError),
|
||||
/// The query contains a term for a `f64`-field, but the value
|
||||
/// is not a f64.
|
||||
ExpectedFloat(ParseFloatError),
|
||||
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
||||
AllButQueryForbidden,
|
||||
/// If no default field is declared, running a query without any
|
||||
@@ -60,6 +63,12 @@ impl From<ParseIntError> for QueryParserError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseFloatError> for QueryParserError {
|
||||
fn from(err: ParseFloatError) -> QueryParserError {
|
||||
QueryParserError::ExpectedFloat(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<chrono::ParseError> for QueryParserError {
|
||||
fn from(err: chrono::ParseError) -> QueryParserError {
|
||||
QueryParserError::DateFormatError(err)
|
||||
@@ -239,6 +248,11 @@ impl QueryParser {
|
||||
let term = Term::from_field_i64(field, val);
|
||||
Ok(vec![(0, term)])
|
||||
}
|
||||
FieldType::F64(_) => {
|
||||
let val: f64 = f64::from_str(phrase)?;
|
||||
let term = Term::from_field_f64(field, val);
|
||||
Ok(vec![(0, term)])
|
||||
}
|
||||
FieldType::Date(_) => match chrono::DateTime::parse_from_rfc3339(phrase) {
|
||||
Ok(x) => Ok(vec![(
|
||||
0,
|
||||
@@ -529,6 +543,7 @@ mod test {
|
||||
schema_builder.add_text_field("nottokenized", STRING);
|
||||
schema_builder.add_text_field("with_stop_words", text_options);
|
||||
schema_builder.add_date_field("date", INDEXED);
|
||||
schema_builder.add_f64_field("float", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let default_fields = vec![title, text];
|
||||
let tokenizer_manager = TokenizerManager::default();
|
||||
@@ -634,6 +649,13 @@ mod test {
|
||||
assert!(query_parser
|
||||
.parse_query("unsigned:\"18446744073709551615\"")
|
||||
.is_ok());
|
||||
assert!(query_parser.parse_query("float:\"3.1\"").is_ok());
|
||||
assert!(query_parser.parse_query("float:\"-2.4\"").is_ok());
|
||||
assert!(query_parser.parse_query("float:\"2.1.2\"").is_err());
|
||||
assert!(query_parser.parse_query("float:\"2.1a\"").is_err());
|
||||
assert!(query_parser
|
||||
.parse_query("float:\"18446744073709551615.0\"")
|
||||
.is_ok());
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"unsigned:2324",
|
||||
"Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])",
|
||||
@@ -645,6 +667,12 @@ mod test {
|
||||
&format!("{:?}", Term::from_field_i64(Field(2u32), -2324)),
|
||||
false,
|
||||
);
|
||||
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"float:2.5",
|
||||
&format!("{:?}", Term::from_field_f64(Field(10u32), 2.5)),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -786,6 +814,11 @@ mod test {
|
||||
query_parser.parse_query("signed:18b"),
|
||||
Err(QueryParserError::ExpectedInt(_))
|
||||
);
|
||||
assert!(query_parser.parse_query("float:\"1.8\"").is_ok());
|
||||
assert_matches!(
|
||||
query_parser.parse_query("float:1.8a"),
|
||||
Err(QueryParserError::ExpectedFloat(_))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -142,6 +142,39 @@ impl RangeQuery {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new `RangeQuery` over a `f64` field.
|
||||
///
|
||||
/// If the field is not of the type `f64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_f64(field: Field, range: Range<f64>) -> RangeQuery {
|
||||
RangeQuery::new_f64_bounds(
|
||||
field,
|
||||
Bound::Included(range.start),
|
||||
Bound::Excluded(range.end),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `f64` field.
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
/// ranges than semi-inclusive range.
|
||||
///
|
||||
/// If the field is not of the type `f64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_f64_bounds(
|
||||
field: Field,
|
||||
left_bound: Bound<f64>,
|
||||
right_bound: Bound<f64>,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &f64| Term::from_field_f64(field, *val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type: Type::F64,
|
||||
left_bound: map_bound(&left_bound, &make_term_val),
|
||||
right_bound: map_bound(&right_bound, &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `u64` field.
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
@@ -397,4 +430,61 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_float() {
|
||||
let float_field: Field;
|
||||
let schema = {
|
||||
let mut schema_builder = Schema::builder();
|
||||
float_field = schema_builder.add_f64_field("floatfield", INDEXED);
|
||||
schema_builder.build()
|
||||
};
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
|
||||
|
||||
for i in 1..100 {
|
||||
let mut doc = Document::new();
|
||||
for j in 1..100 {
|
||||
if i % j == 0 {
|
||||
doc.add_f64(float_field, j as f64);
|
||||
}
|
||||
}
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let count_multiples =
|
||||
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
|
||||
|
||||
assert_eq!(count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)), 9);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_f64_bounds(
|
||||
float_field,
|
||||
Bound::Included(10.0),
|
||||
Bound::Included(11.0)
|
||||
)),
|
||||
18
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_f64_bounds(
|
||||
float_field,
|
||||
Bound::Excluded(9.0),
|
||||
Bound::Included(10.0)
|
||||
)),
|
||||
9
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_f64_bounds(
|
||||
float_field,
|
||||
Bound::Included(9.0),
|
||||
Bound::Unbounded
|
||||
)),
|
||||
91
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user