From 6eb4e08636f48091f7783d52c84ef78e7ff1ada4 Mon Sep 17 00:00:00 2001 From: fdb-hiroshima <35889323+fdb-hiroshima@users.noreply.github.com> Date: Sat, 27 Jul 2019 10:57:33 +0200 Subject: [PATCH] add support for float (#603) * add basic support for float as for i64, they are mapped to u64 for indexing query parser don't work yet * Update value.rs * implement support for float in query parser * Update README.md --- CHANGELOG.md | 5 ++ README.md | 4 +- src/collector/int_facet_collector.rs | 7 +- src/collector/top_score_collector.rs | 1 + src/common/mod.rs | 63 ++++++++++++++++- src/common/serialize.rs | 18 +++++ src/fastfield/mod.rs | 26 ++++++- src/fastfield/readers.rs | 38 +++++++++++ src/fastfield/writer.rs | 20 +++--- src/indexer/merger.rs | 1 + src/indexer/segment_writer.rs | 11 +++ src/lib.rs | 44 ++++++++++-- src/postings/postings_writer.rs | 3 +- src/query/query_parser/query_grammar.rs | 4 +- src/query/query_parser/query_parser.rs | 39 ++++++++++- src/query/range_query.rs | 90 +++++++++++++++++++++++++ src/schema/document.rs | 5 ++ src/schema/field_entry.rs | 20 +++++- src/schema/field_type.rs | 18 ++++- src/schema/flags.rs | 4 +- src/schema/mod.rs | 2 +- src/schema/schema.rs | 54 +++++++++++++-- src/schema/term.rs | 30 ++++++++- src/schema/value.rs | 81 +++++++++++++++++++--- src/termdict/mod.rs | 3 + 25 files changed, 545 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 465210f74..4f40ff22e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +Tantivy 0.11.0 +===================== + +- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima) + Tantivy 0.10.0 ===================== diff --git a/README.md b/README.md index 0441e82c6..38e311170 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,9 @@ performance for different type of queries / collection. - Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop) - Mmap directory - SIMD integer compression when the platform/CPU includes the SSE2 instruction set. -- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene) +- Single valued and multivalued u64, i64 and f64 fast fields (equivalent of doc values in Lucene) - `&[u8]` fast fields -- Text, i64, u64, dates and hierarchical facet fields +- Text, i64, u64, f64, dates and hierarchical facet fields - LZ4 compressed document store - Range queries - Faceted search diff --git a/src/collector/int_facet_collector.rs b/src/collector/int_facet_collector.rs index 4232343e6..d9b4c1310 100644 --- a/src/collector/int_facet_collector.rs +++ b/src/collector/int_facet_collector.rs @@ -82,6 +82,7 @@ mod tests { let mut schema_builder = schema::Schema::builder(); let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST); let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST); + let num_field_f64 = schema_builder.add_f64_field("num_f64", FAST); let text_field = schema_builder.add_text_field("text", STRING); let schema = schema_builder.build(); @@ -94,6 +95,7 @@ mod tests { index_writer.add_document(doc!( num_field_i64 => ((i as i64) % 3i64) as i64, num_field_u64 => (i % 2u64) as u64, + num_field_f64 => (i % 4u64) as f64, text_field => "text" )); } @@ -104,10 +106,11 @@ mod tests { let searcher = index.reader().searcher(); let mut ffvf_i64: IntFacetCollector = IntFacetCollector::new(num_field_i64); let mut ffvf_u64: IntFacetCollector = IntFacetCollector::new(num_field_u64); + let mut ffvf_f64: IntFacetCollector = IntFacetCollector::new(num_field_f64); { // perform the query - let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64); + let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64).push(&mut ffvf_f64); let mut query_parser = QueryParser::for_index(index, vec![text_field]); let query = query_parser.parse_query("text:text").unwrap(); query.search(&searcher, &mut facet_collectors).unwrap(); @@ -117,6 +120,8 @@ mod tests { assert_eq!(ffvf_u64.counters[&1], 5); assert_eq!(ffvf_i64.counters[&0], 4); assert_eq!(ffvf_i64.counters[&1], 3); + assert_eq!(ffvf_f64.counters[&0.0], 3); + assert_eq!(ffvf_f64.counters[&2.0], 2); } } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index c9b03d020..4ec09ce53 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -160,6 +160,7 @@ impl TopDocs { .fast_fields() .u64(field) .expect("Field requested is not a i64/u64 fast field."); + //TODO error message missmatch actual behavior for i64 move |doc: DocId| ff_reader.get(doc) }) } diff --git a/src/common/mod.rs b/src/common/mod.rs index 8f6deaf0b..7e41f0813 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -99,15 +99,54 @@ pub fn u64_to_i64(val: u64) -> i64 { (val ^ HIGHEST_BIT) as i64 } +/// Maps a `f64` to `u64` +/// +/// For simplicity, tantivy internally handles `f64` as `u64`. +/// The mapping is defined by this function. +/// +/// Maps `f64` to `u64` so that lexical order is preserved. +/// +/// This is more suited than simply casting (`val as u64`) +/// which would truncate the result +/// +/// # See also +/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html). +#[inline(always)] +pub fn f64_to_u64(val: f64) -> u64 { + let bits = val.to_bits(); + if val.is_sign_positive() { + bits ^ HIGHEST_BIT + } else { + !bits + } +} + +/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). +#[inline(always)] +pub fn u64_to_f64(val: u64) -> f64 { + f64::from_bits( + if val & HIGHEST_BIT != 0 { + val ^ HIGHEST_BIT + } else { + !val + } + ) +} + #[cfg(test)] pub(crate) mod test { pub use super::serialize::test::fixed_size_test; - use super::{compute_num_bits, i64_to_u64, u64_to_i64}; + use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64}; + use std::f64; fn test_i64_converter_helper(val: i64) { assert_eq!(u64_to_i64(i64_to_u64(val)), val); } + + fn test_f64_converter_helper(val: f64) { + assert_eq!(u64_to_f64(f64_to_u64(val)), val); + } #[test] fn test_i64_converter() { @@ -121,6 +160,28 @@ pub(crate) mod test { } } + #[test] + fn test_f64_converter() { + test_f64_converter_helper(f64::INFINITY); + test_f64_converter_helper(f64::NEG_INFINITY); + test_f64_converter_helper(0.0); + test_f64_converter_helper(-0.0); + test_f64_converter_helper(1.0); + test_f64_converter_helper(-1.0); + } + + #[test] + fn test_f64_order() { + assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number + assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa + assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent + assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa + assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg + assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0)); + assert!(f64_to_u64(-2.0) < f64_to_u64(1.0)); + assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5)); + } + #[test] fn test_compute_num_bits() { assert_eq!(compute_num_bits(1), 1u8); diff --git a/src/common/serialize.rs b/src/common/serialize.rs index 4156115c7..85a944367 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -102,6 +102,19 @@ impl FixedSize for i64 { const SIZE_IN_BYTES: usize = 8; } +impl BinarySerializable for f64 { + fn serialize(&self, writer: &mut W) -> io::Result<()> { + writer.write_f64::(*self) + } + fn deserialize(reader: &mut R) -> io::Result { + reader.read_f64::() + } +} + +impl FixedSize for f64 { + const SIZE_IN_BYTES: usize = 8; +} + impl BinarySerializable for u8 { fn serialize(&self, writer: &mut W) -> io::Result<()> { writer.write_u8(*self) @@ -172,6 +185,11 @@ pub mod test { fixed_size_test::(); } + #[test] + fn test_serialize_f64() { + fixed_size_test::(); + } + #[test] fn test_serialize_u64() { fixed_size_test::(); diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index aa5104d1f..de8f93220 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -48,7 +48,7 @@ mod readers; mod serializer; mod writer; -/// Trait for types that are allowed for fast fields: (u64 or i64). +/// Trait for types that are allowed for fast fields: (u64, i64 and f64). pub trait FastValue: Default + Clone + Copy + Send + Sync + PartialOrd { /// Converts a value from u64 /// @@ -114,11 +114,33 @@ impl FastValue for i64 { } } +impl FastValue for f64 { + fn from_u64(val: u64) -> Self { + common::u64_to_f64(val) + } + + fn to_u64(&self) -> u64 { + common::f64_to_u64(*self) + } + + fn fast_field_cardinality(field_type: &FieldType) -> Option { + match *field_type { + FieldType::F64(ref integer_options) => integer_options.get_fastfield_cardinality(), + _ => None, + } + } + + fn as_u64(&self) -> u64 { + self.to_bits() + } +} + fn value_to_u64(value: &Value) -> u64 { match *value { Value::U64(ref val) => *val, Value::I64(ref val) => common::i64_to_u64(*val), - _ => panic!("Expected a u64/i64 field, got {:?} ", value), + Value::F64(ref val) => common::f64_to_u64(*val), + _ => panic!("Expected a u64/i64/f64 field, got {:?} ", value), } } diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index 4019cf37c..1eb4ca28b 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -14,8 +14,10 @@ use std::collections::HashMap; pub struct FastFieldReaders { fast_field_i64: HashMap>, fast_field_u64: HashMap>, + fast_field_f64: HashMap>, fast_field_i64s: HashMap>, fast_field_u64s: HashMap>, + fast_field_f64s: HashMap>, fast_bytes: HashMap, fast_fields_composite: CompositeFile, } @@ -23,6 +25,7 @@ pub struct FastFieldReaders { enum FastType { I64, U64, + F64, } fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> { @@ -33,6 +36,9 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality FieldType::I64(options) => options .get_fastfield_cardinality() .map(|cardinality| (FastType::I64, cardinality)), + FieldType::F64(options) => options + .get_fastfield_cardinality() + .map(|cardinality| (FastType::F64, cardinality)), FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)), _ => None, } @@ -46,8 +52,10 @@ impl FastFieldReaders { let mut fast_field_readers = FastFieldReaders { fast_field_i64: Default::default(), fast_field_u64: Default::default(), + fast_field_f64: Default::default(), fast_field_i64s: Default::default(), fast_field_u64s: Default::default(), + fast_field_f64s: Default::default(), fast_bytes: Default::default(), fast_fields_composite: fast_fields_composite.clone(), }; @@ -82,6 +90,12 @@ impl FastFieldReaders { FastFieldReader::open(fast_field_data.clone()), ); } + FastType::F64 => { + fast_field_readers.fast_field_f64.insert( + field, + FastFieldReader::open(fast_field_data.clone()), + ); + } } } else { return Err(From::from(FastFieldNotAvailableError::new(field_entry))); @@ -109,6 +123,14 @@ impl FastFieldReaders { .fast_field_u64s .insert(field, multivalued_int_fast_field); } + FastType::F64 => { + let vals_reader = FastFieldReader::open(fast_field_data); + let multivalued_int_fast_field = + MultiValueIntFastFieldReader::open(idx_reader, vals_reader); + fast_field_readers + .fast_field_f64s + .insert(field, multivalued_int_fast_field); + } } } else { return Err(From::from(FastFieldNotAvailableError::new(field_entry))); @@ -135,6 +157,8 @@ impl FastFieldReaders { /// If the field is a i64-fast field, return the associated u64 reader. Values are /// mapped from i64 to u64 using a (well the, it is unique) monotonic mapping. /// /// + ///TODO should it also be lenient with f64? + /// /// This method is useful when merging segment reader. pub(crate) fn u64_lenient(&self, field: Field) -> Option> { if let Some(u64_ff_reader) = self.u64(field) { @@ -153,6 +177,13 @@ impl FastFieldReaders { self.fast_field_i64.get(&field).cloned() } + /// Returns the `f64` fast field reader reader associated to `field`. + /// + /// If `field` is not a f64 fast field, this method returns `None`. + pub fn f64(&self, field: Field) -> Option> { + self.fast_field_f64.get(&field).cloned() + } + /// Returns a `u64s` multi-valued fast field reader reader associated to `field`. /// /// If `field` is not a u64 multi-valued fast field, this method returns `None`. @@ -182,6 +213,13 @@ impl FastFieldReaders { self.fast_field_i64s.get(&field).cloned() } + /// Returns a `f64s` multi-valued fast field reader reader associated to `field`. + /// + /// If `field` is not a f64 multi-valued fast field, this method returns `None`. + pub fn f64s(&self, field: Field) -> Option> { + self.fast_field_f64s.get(&field).cloned() + } + /// Returns the `bytes` fast field reader associated to `field`. /// /// If `field` is not a bytes fast field, returns `None`. diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index a9d91208f..f1817f647 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -25,13 +25,13 @@ impl FastFieldsWriter { for (field_id, field_entry) in schema.fields().iter().enumerate() { let field = Field(field_id as u32); - let default_value = if let FieldType::I64(_) = *field_entry.field_type() { - common::i64_to_u64(0i64) - } else { - 0u64 + let default_value = match *field_entry.field_type() { + FieldType::I64(_) => common::i64_to_u64(0i64), + FieldType::F64(_) => common::f64_to_u64(0.0f64), + _ => 0u64, }; match *field_entry.field_type() { - FieldType::I64(ref int_options) | FieldType::U64(ref int_options) => { + FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => { match int_options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => { let mut fast_field_writer = IntFastFieldWriter::new(field); @@ -142,9 +142,9 @@ impl FastFieldsWriter { /// bitpacked and the number of bits required for bitpacking /// can only been known once we have seen all of the values. /// -/// Both u64, and i64 use the same writer. -/// i64 are just remapped to the `0..2^64 - 1` -/// using `common::i64_to_u64`. +/// Both u64, i64 and f64 use the same writer. +/// i64 and f64 are just remapped to the `0..2^64 - 1` +/// using `common::i64_to_u64` and `common::f64_to_u64`. pub struct IntFastFieldWriter { field: Field, vals: Vec, @@ -203,8 +203,8 @@ impl IntFastFieldWriter { /// Extract the value associated to the fast field for /// this document. /// - /// i64 are remapped to u64 using the logic - /// in `common::i64_to_u64`. + /// i64 and f64 are remapped to u64 using the logic + /// in `common::i64_to_u64` and `common::f64_to_u64`. /// /// If the value is missing, then the default value is used /// instead. diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index d01f351ae..be38c0a87 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -207,6 +207,7 @@ impl IndexMerger { } FieldType::U64(ref options) | FieldType::I64(ref options) + | FieldType::F64(ref options) | FieldType::Date(ref options) => match options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => { self.write_single_fast_field(field, fast_field_serializer)?; diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index b4c54c26c..4dd73dfda 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -214,6 +214,17 @@ impl SegmentWriter { } } } + FieldType::F64(ref int_option) => { + if int_option.is_indexed() { + for field_value in field_values { + let term = Term::from_field_f64( + field_value.field(), + field_value.value().f64_value(), + ); + self.multifield_postings.subscribe(doc_id, &term); + } + } + } FieldType::Bytes => { // Do nothing. Bytes only supports fast fields. } diff --git a/src/lib.rs b/src/lib.rs index be29b623a..b423f4345 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -179,7 +179,7 @@ pub use crate::indexer::IndexWriter; pub use crate::postings::Postings; pub use crate::schema::{Document, Term}; -pub use crate::common::{i64_to_u64, u64_to_i64}; +pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64}; /// Expose the current version of tantivy, as well /// whether it was compiled with the simd compression. @@ -625,6 +625,30 @@ mod tests { assert!(!postings.advance()); } + #[test] + fn test_indexed_f64() { + let mut schema_builder = Schema::builder(); + let value_field = schema_builder.add_f64_field("value", INDEXED); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let val = std::f64::consts::PI; + index_writer.add_document(doc!(value_field => val)); + index_writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let term = Term::from_field_f64(value_field, val); + let mut postings = searcher + .segment_reader(0) + .inverted_index(term.field()) + .read_postings(&term, IndexRecordOption::Basic) + .unwrap(); + assert!(postings.advance()); + assert_eq!(postings.doc(), 0); + assert!(!postings.advance()); + } + #[test] fn test_indexedfield_not_in_documents() { let mut schema_builder = Schema::builder(); @@ -817,6 +841,7 @@ mod tests { let mut schema_builder = Schema::builder(); let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST); let fast_field_signed = schema_builder.add_i64_field("signed", FAST); + let fast_field_float = schema_builder.add_f64_field("float", FAST); let text_field = schema_builder.add_text_field("text", TEXT); let stored_int_field = schema_builder.add_u64_field("text", STORED); let schema = schema_builder.build(); @@ -824,7 +849,7 @@ mod tests { let index = Index::create_in_ram(schema); let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); { - let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64); + let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64); index_writer.add_document(document); index_writer.commit().unwrap(); } @@ -844,10 +869,14 @@ mod tests { assert!(fast_field_reader_opt.is_none()); } { - let fast_field_reader_opt = segment_reader.fast_fields().i64(fast_field_signed); + let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_float); + assert!(fast_field_reader_opt.is_none()); + } + { + let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned); assert!(fast_field_reader_opt.is_some()); let fast_field_reader = fast_field_reader_opt.unwrap(); - assert_eq!(fast_field_reader.get(0), 4i64) + assert_eq!(fast_field_reader.get(0), 4u64) } { @@ -856,5 +885,12 @@ mod tests { let fast_field_reader = fast_field_reader_opt.unwrap(); assert_eq!(fast_field_reader.get(0), 4i64) } + + { + let fast_field_reader_opt = segment_reader.fast_fields().f64(fast_field_float); + assert!(fast_field_reader_opt.is_some()); + let fast_field_reader = fast_field_reader_opt.unwrap(); + assert_eq!(fast_field_reader.get(0), 4f64) + } } } diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index bff89e453..9b6e00784 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -35,6 +35,7 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box .unwrap_or_else(|| SpecializedPostingsWriter::::new_boxed()), FieldType::U64(_) | FieldType::I64(_) + | FieldType::F64(_) | FieldType::Date(_) | FieldType::HierarchicalFacet => SpecializedPostingsWriter::::new_boxed(), FieldType::Bytes => { @@ -154,7 +155,7 @@ impl MultiFieldPostingsWriter { .collect(); unordered_term_mappings.insert(field, mapping); } - FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => {} + FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {} FieldType::Bytes => {} } diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 6a968c6ac..a3df7146e 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -20,7 +20,7 @@ parser! { parser! { fn word[I]()(I) -> String where [I: Stream] { - many1(satisfy(char::is_alphanumeric)) + many1(satisfy(|c: char| c.is_alphanumeric() || c=='.')) .and_then(|s: String| { match s.as_str() { "OR" => Err(StreamErrorFor::::unexpected_static_message("OR")), @@ -266,6 +266,7 @@ mod test { test_parse_query_to_ast_helper("(+a)", "+(\"a\")"); test_parse_query_to_ast_helper("(+a +b)", "(+(\"a\") +(\"b\"))"); test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\""); + test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\""); test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")"); test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+(abc:\"toto\") -(\"titi\"))"); test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")"); @@ -277,6 +278,7 @@ mod test { test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}"); test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}"); test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}"); + test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}"); test_is_parse_err("abc + "); } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index bef73ca8e..ffa7b2065 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -18,7 +18,7 @@ use crate::schema::{FieldType, Term}; use crate::tokenizer::TokenizerManager; use combine::Parser; use std::borrow::Cow; -use std::num::ParseIntError; +use std::num::{ParseIntError, ParseFloatError}; use std::ops::Bound; use std::str::FromStr; @@ -30,9 +30,12 @@ pub enum QueryParserError { /// `FieldDoesNotExist(field_name: String)` /// The query references a field that is not in the schema FieldDoesNotExist(String), - /// The query contains a term for a `u64`-field, but the value - /// is not a u64. + /// The query contains a term for a `u64` or `i64`-field, but the value + /// is neither. ExpectedInt(ParseIntError), + /// The query contains a term for a `f64`-field, but the value + /// is not a f64. + ExpectedFloat(ParseFloatError), /// It is forbidden queries that are only "excluding". (e.g. -title:pop) AllButQueryForbidden, /// If no default field is declared, running a query without any @@ -60,6 +63,12 @@ impl From for QueryParserError { } } +impl From for QueryParserError { + fn from(err: ParseFloatError) -> QueryParserError { + QueryParserError::ExpectedFloat(err) + } +} + impl From for QueryParserError { fn from(err: chrono::ParseError) -> QueryParserError { QueryParserError::DateFormatError(err) @@ -239,6 +248,11 @@ impl QueryParser { let term = Term::from_field_i64(field, val); Ok(vec![(0, term)]) } + FieldType::F64(_) => { + let val: f64 = f64::from_str(phrase)?; + let term = Term::from_field_f64(field, val); + Ok(vec![(0, term)]) + } FieldType::Date(_) => match chrono::DateTime::parse_from_rfc3339(phrase) { Ok(x) => Ok(vec![( 0, @@ -529,6 +543,7 @@ mod test { schema_builder.add_text_field("nottokenized", STRING); schema_builder.add_text_field("with_stop_words", text_options); schema_builder.add_date_field("date", INDEXED); + schema_builder.add_f64_field("float", INDEXED); let schema = schema_builder.build(); let default_fields = vec![title, text]; let tokenizer_manager = TokenizerManager::default(); @@ -634,6 +649,13 @@ mod test { assert!(query_parser .parse_query("unsigned:\"18446744073709551615\"") .is_ok()); + assert!(query_parser.parse_query("float:\"3.1\"").is_ok()); + assert!(query_parser.parse_query("float:\"-2.4\"").is_ok()); + assert!(query_parser.parse_query("float:\"2.1.2\"").is_err()); + assert!(query_parser.parse_query("float:\"2.1a\"").is_err()); + assert!(query_parser + .parse_query("float:\"18446744073709551615.0\"") + .is_ok()); test_parse_query_to_logical_ast_helper( "unsigned:2324", "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])", @@ -645,6 +667,12 @@ mod test { &format!("{:?}", Term::from_field_i64(Field(2u32), -2324)), false, ); + + test_parse_query_to_logical_ast_helper( + "float:2.5", + &format!("{:?}", Term::from_field_f64(Field(10u32), 2.5)), + false, + ); } #[test] @@ -786,6 +814,11 @@ mod test { query_parser.parse_query("signed:18b"), Err(QueryParserError::ExpectedInt(_)) ); + assert!(query_parser.parse_query("float:\"1.8\"").is_ok()); + assert_matches!( + query_parser.parse_query("float:1.8a"), + Err(QueryParserError::ExpectedFloat(_)) + ); } #[test] diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 1ec9d20ce..76a0c15c8 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -142,6 +142,39 @@ impl RangeQuery { } } + /// Creates a new `RangeQuery` over a `f64` field. + /// + /// If the field is not of the type `f64`, tantivy + /// will panic when the `Weight` object is created. + pub fn new_f64(field: Field, range: Range) -> RangeQuery { + RangeQuery::new_f64_bounds( + field, + Bound::Included(range.start), + Bound::Excluded(range.end), + ) + } + + /// Create a new `RangeQuery` over a `f64` field. + /// + /// The two `Bound` arguments make it possible to create more complex + /// ranges than semi-inclusive range. + /// + /// If the field is not of the type `f64`, tantivy + /// will panic when the `Weight` object is created. + pub fn new_f64_bounds( + field: Field, + left_bound: Bound, + right_bound: Bound, + ) -> RangeQuery { + let make_term_val = |val: &f64| Term::from_field_f64(field, *val).value_bytes().to_owned(); + RangeQuery { + field, + value_type: Type::F64, + left_bound: map_bound(&left_bound, &make_term_val), + right_bound: map_bound(&right_bound, &make_term_val), + } + } + /// Create a new `RangeQuery` over a `u64` field. /// /// The two `Bound` arguments make it possible to create more complex @@ -397,4 +430,61 @@ mod tests { ); } + #[test] + fn test_range_float() { + let float_field: Field; + let schema = { + let mut schema_builder = Schema::builder(); + float_field = schema_builder.add_f64_field("floatfield", INDEXED); + schema_builder.build() + }; + + let index = Index::create_in_ram(schema); + { + let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); + + for i in 1..100 { + let mut doc = Document::new(); + for j in 1..100 { + if i % j == 0 { + doc.add_f64(float_field, j as f64); + } + } + index_writer.add_document(doc); + } + + index_writer.commit().unwrap(); + } + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let count_multiples = + |range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap(); + + assert_eq!(count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)), 9); + assert_eq!( + count_multiples(RangeQuery::new_f64_bounds( + float_field, + Bound::Included(10.0), + Bound::Included(11.0) + )), + 18 + ); + assert_eq!( + count_multiples(RangeQuery::new_f64_bounds( + float_field, + Bound::Excluded(9.0), + Bound::Included(10.0) + )), + 9 + ); + assert_eq!( + count_multiples(RangeQuery::new_f64_bounds( + float_field, + Bound::Included(9.0), + Bound::Unbounded + )), + 91 + ); + } + } diff --git a/src/schema/document.rs b/src/schema/document.rs index 678970f62..6b8bc7f38 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -88,6 +88,11 @@ impl Document { self.add(FieldValue::new(field, Value::I64(value))); } + /// Add a f64 field + pub fn add_f64(&mut self, field: Field, value: f64) { + self.add(FieldValue::new(field, Value::F64(value))); + } + /// Add a date field pub fn add_date(&mut self, field: Field, value: &DateTime) { self.add(FieldValue::new(field, Value::Date(*value))); diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 8c47d02ba..8794e0238 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -48,6 +48,15 @@ impl FieldEntry { } } + /// Creates a new f64 field entry in the schema, given + /// a name, and some options. + pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry { + FieldEntry { + name: field_name, + field_type: FieldType::F64(field_type), + } + } + /// Creates a new date field entry in the schema, given /// a name, and some options. pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry { @@ -89,6 +98,7 @@ impl FieldEntry { FieldType::Str(ref options) => options.get_indexing_options().is_some(), FieldType::U64(ref options) | FieldType::I64(ref options) + | FieldType::F64(ref options) | FieldType::Date(ref options) => options.is_indexed(), FieldType::HierarchicalFacet => true, FieldType::Bytes => false, @@ -98,7 +108,7 @@ impl FieldEntry { /// Returns true iff the field is a int (signed or unsigned) fast field pub fn is_int_fast(&self) -> bool { match self.field_type { - FieldType::U64(ref options) | FieldType::I64(ref options) => options.is_fast(), + FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) => options.is_fast(), _ => false, } } @@ -108,6 +118,7 @@ impl FieldEntry { match self.field_type { FieldType::U64(ref options) | FieldType::I64(ref options) + | FieldType::F64(ref options) | FieldType::Date(ref options) => options.is_stored(), FieldType::Str(ref options) => options.is_stored(), // TODO make stored hierarchical facet optional @@ -138,6 +149,10 @@ impl Serialize for FieldEntry { s.serialize_field("type", "i64")?; s.serialize_field("options", options)?; } + FieldType::F64(ref options) => { + s.serialize_field("type", "f64")?; + s.serialize_field("options", options)?; + } FieldType::Date(ref options) => { s.serialize_field("type", "date")?; s.serialize_field("options", options)?; @@ -205,7 +220,7 @@ impl<'de> Deserialize<'de> for FieldEntry { "bytes" => { field_type = Some(FieldType::Bytes); } - "text" | "u64" | "i64" | "date" => { + "text" | "u64" | "i64" | "f64" | "date" => { // These types require additional options to create a field_type } _ => panic!("unhandled type"), @@ -222,6 +237,7 @@ impl<'de> Deserialize<'de> for FieldEntry { "text" => field_type = Some(FieldType::Str(map.next_value()?)), "u64" => field_type = Some(FieldType::U64(map.next_value()?)), "i64" => field_type = Some(FieldType::I64(map.next_value()?)), + "f64" => field_type = Some(FieldType::F64(map.next_value()?)), "date" => field_type = Some(FieldType::Date(map.next_value()?)), _ => { let msg = format!("Unrecognised type {}", ty); diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index d30436db3..1c93ccdcb 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -35,6 +35,8 @@ pub enum Type { U64, /// `i64` I64, + /// `f64` + F64, /// `date(i64) timestamp` Date, /// `tantivy::schema::Facet`. Passed as a string in JSON. @@ -53,6 +55,8 @@ pub enum FieldType { U64(IntOptions), /// Signed 64-bits integers 64 field type configuration I64(IntOptions), + /// 64-bits float 64 field type configuration + F64(IntOptions), /// Signed 64-bits Date 64 field type configuration, Date(IntOptions), /// Hierachical Facet @@ -68,6 +72,7 @@ impl FieldType { FieldType::Str(_) => Type::Str, FieldType::U64(_) => Type::U64, FieldType::I64(_) => Type::I64, + FieldType::F64(_) => Type::F64, FieldType::Date(_) => Type::Date, FieldType::HierarchicalFacet => Type::HierarchicalFacet, FieldType::Bytes => Type::Bytes, @@ -78,7 +83,7 @@ impl FieldType { pub fn is_indexed(&self) -> bool { match *self { FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(), - FieldType::U64(ref int_options) | FieldType::I64(ref int_options) => { + FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => { int_options.is_indexed() } FieldType::Date(ref date_options) => date_options.is_indexed(), @@ -98,6 +103,7 @@ impl FieldType { .map(TextFieldIndexing::index_option), FieldType::U64(ref int_options) | FieldType::I64(ref int_options) + | FieldType::F64(ref int_options) | FieldType::Date(ref int_options) => { if int_options.is_indexed() { Some(IndexRecordOption::Basic) @@ -119,7 +125,7 @@ impl FieldType { match *json { JsonValue::String(ref field_text) => match *self { FieldType::Str(_) => Ok(Value::Str(field_text.clone())), - FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => Err( + FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => Err( ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)), ), FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))), @@ -146,6 +152,14 @@ impl FieldType { let msg = format!("Expected a u64 int, got {:?}", json); Err(ValueParsingError::OverflowError(msg)) } + }, + FieldType::F64(_) => { + if let Some(field_val_f64) = field_val_num.as_f64() { + Ok(Value::F64(field_val_f64)) + } else { + let msg = format!("Expected a f64 int, got {:?}", json); + Err(ValueParsingError::OverflowError(msg)) + } } FieldType::Str(_) | FieldType::HierarchicalFacet | FieldType::Bytes => { let msg = format!("Expected a string, got {:?}", json); diff --git a/src/schema/flags.rs b/src/schema/flags.rs index 33be5f612..81a93b99d 100644 --- a/src/schema/flags.rs +++ b/src/schema/flags.rs @@ -22,7 +22,7 @@ pub const STORED: SchemaFlagList = SchemaFlagList { pub struct IndexedFlag; /// Flag to mark the field as indexed. /// -/// The `INDEXED` flag can only be used when building `IntOptions` (`u64` and `i64` fields) +/// The `INDEXED` flag can only be used when building `IntOptions` (`u64`, `i64` and `f64` fields) /// Of course, text fields can also be indexed... But this is expressed by using either the /// `STRING` (untokenized) or `TEXT` (tokenized with the english tokenizer) flags. pub const INDEXED: SchemaFlagList = SchemaFlagList { @@ -36,7 +36,7 @@ pub struct FastFlag; /// /// Fast fields can be random-accessed rapidly. Fields useful for scoring, filtering /// or collection should be mark as fast fields. -/// The `FAST` flag can only be used when building `IntOptions` (`u64` and `i64` fields) +/// The `FAST` flag can only be used when building `IntOptions` (`u64`, `i64` and `f64` fields) pub const FAST: SchemaFlagList = SchemaFlagList { head: FastFlag, tail: (), diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 7ac3a1448..8fc088e5f 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -54,7 +54,7 @@ On the other hand setting the field as stored or not determines whether the fiel when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called. -## Setting a u64 or a i64 field +## Setting a u64, a i64 or a f64 field ### Example diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 811212afb..6094d4b47 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -82,6 +82,26 @@ impl SchemaBuilder { self.add_field(field_entry) } + /// Adds a new f64 field. + /// Returns the associated field handle + /// + /// # Caution + /// + /// Appending two fields with the same name + /// will result in the shadowing of the first + /// by the second one. + /// The first field will get a field id + /// but only the second one will be indexed + pub fn add_f64_field>( + &mut self, + field_name_str: &str, + field_options: T, + ) -> Field { + let field_name = String::from(field_name_str); + let field_entry = FieldEntry::new_f64(field_name, field_options.into()); + self.add_field(field_entry) + } + /// Adds a new date field. /// Returns the associated field handle /// Internally, Tantivy simply stores dates as i64 UTC timestamps, @@ -376,10 +396,14 @@ mod tests { let popularity_options = IntOptions::default() .set_stored() .set_fast(Cardinality::SingleValue); + let score_options = IntOptions::default() + .set_indexed() + .set_fast(Cardinality::SingleValue); schema_builder.add_text_field("title", TEXT); schema_builder.add_text_field("author", STRING); schema_builder.add_u64_field("count", count_options); schema_builder.add_i64_field("popularity", popularity_options); + schema_builder.add_f64_field("score", score_options); let schema = schema_builder.build(); let schema_json = serde_json::to_string_pretty(&schema).unwrap(); let expected = r#"[ @@ -422,6 +446,15 @@ mod tests { "fast": "single", "stored": true } + }, + { + "name": "score", + "type": "f64", + "options": { + "indexed": true, + "fast": "single", + "stored": false + } } ]"#; assert_eq!(schema_json, expected); @@ -434,6 +467,8 @@ mod tests { assert_eq!("author", fields.next().unwrap().name()); assert_eq!("count", fields.next().unwrap().name()); assert_eq!("popularity", fields.next().unwrap().name()); + assert_eq!("score", fields.next().unwrap().name()); + assert!(fields.next().is_none()); } #[test] @@ -466,10 +501,14 @@ mod tests { let popularity_options = IntOptions::default() .set_stored() .set_fast(Cardinality::SingleValue); + let score_options = IntOptions::default() + .set_indexed() + .set_fast(Cardinality::SingleValue); let title_field = schema_builder.add_text_field("title", TEXT); let author_field = schema_builder.add_text_field("author", STRING); let count_field = schema_builder.add_u64_field("count", count_options); let popularity_field = schema_builder.add_i64_field("popularity", popularity_options); + let score_field = schema_builder.add_f64_field("score", score_options); let schema = schema_builder.build(); { let doc = schema.parse_document("{}").unwrap(); @@ -482,7 +521,8 @@ mod tests { "title": "my title", "author": "fulmicoton", "count": 4, - "popularity": 10 + "popularity": 10, + "score": 80.5 }"#, ) .unwrap(); @@ -493,6 +533,7 @@ mod tests { ); assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4); assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10); + assert_eq!(doc.get_first(score_field).unwrap().f64_value(), 80.5); } { let json_err = schema.parse_document( @@ -501,6 +542,7 @@ mod tests { "author": "fulmicoton", "count": 4, "popularity": 10, + "score": 80.5, "jambon": "bayonne" }"#, ); @@ -513,6 +555,7 @@ mod tests { "author": "fulmicoton", "count": "5", "popularity": "10", + "score": "80.5", "jambon": "bayonne" }"#, ); @@ -527,7 +570,8 @@ mod tests { "title": "my title", "author": "fulmicoton", "count": -5, - "popularity": 10 + "popularity": 10, + "score": 80.5 }"#, ); assert_matches!( @@ -541,7 +585,8 @@ mod tests { "title": "my title", "author": "fulmicoton", "count": 9223372036854775808, - "popularity": 10 + "popularity": 10, + "score": 80.5 }"#, ); assert!(!matches!( @@ -555,7 +600,8 @@ mod tests { "title": "my title", "author": "fulmicoton", "count": 50, - "popularity": 9223372036854775808 + "popularity": 9223372036854775808, + "score": 80.5 }"#, ); assert_matches!( diff --git a/src/schema/term.rs b/src/schema/term.rs index 7c85c89c3..4800d5742 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -19,9 +19,9 @@ where B: AsRef<[u8]>; impl Term { - /// Builds a term given a field, and a u64-value + /// Builds a term given a field, and a i64-value /// - /// Assuming the term has a field id of 1, and a u64 value of 3234, + /// Assuming the term has a field id of 1, and a i64 value of 3234, /// the Term will have 8 bytes. /// /// The first four byte are dedicated to storing the field id as a u64. @@ -31,6 +31,18 @@ impl Term { Term::from_field_u64(field, val_u64) } + /// Builds a term given a field, and a f64-value + /// + /// Assuming the term has a field id of 1, and a u64 value of 3234, + /// the Term will have 8 bytes. <= this is wrong + /// + /// The first four byte are dedicated to storing the field id as a u64. + /// The 4 following bytes are encoding the u64 value. + pub fn from_field_f64(field: Field, val: f64) -> Term { + let val_u64: u64 = common::f64_to_u64(val); + Term::from_field_u64(field, val_u64) + } + /// Builds a term given a field, and a DateTime value /// /// Assuming the term has a field id of 1, and a timestamp i64 value of 3234, @@ -112,6 +124,11 @@ impl Term { self.set_u64(common::i64_to_u64(val)); } + /// Sets a `f64` value in the term. + pub fn set_f64(&mut self, val: f64) { + self.set_u64(common::f64_to_u64(val)); + } + fn set_bytes(&mut self, bytes: &[u8]) { self.0.resize(4, 0u8); self.0.extend(bytes); @@ -161,6 +178,15 @@ where common::u64_to_i64(BigEndian::read_u64(&self.0.as_ref()[4..])) } + /// Returns the `f64` value stored in a term. + /// + /// # Panics + /// ... or returns an invalid value + /// if the term is not a `i64` field. + pub fn get_f64(&self) -> f64 { + common::u64_to_f64(BigEndian::read_u64(&self.0.as_ref()[4..])) + } + /// Returns the text associated with the term. /// /// # Panics diff --git a/src/schema/value.rs b/src/schema/value.rs index bb576e982..0f3209d8e 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -2,11 +2,11 @@ use crate::schema::Facet; use crate::DateTime; use serde::de::Visitor; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::fmt; +use std::{fmt, cmp::Ordering}; /// Value represents the value of a any field. /// It is an enum over all over all of the possible field type. -#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] +#[derive(Debug, Clone, PartialEq, PartialOrd)] pub enum Value { /// The str type is used for any text information. Str(String), @@ -14,6 +14,8 @@ pub enum Value { U64(u64), /// Signed 64-bits Integer `i64` I64(i64), + /// 64-bits Float `f64` + F64(f64), /// Signed 64-bits Date time stamp `date` Date(DateTime), /// Hierarchical Facet @@ -22,6 +24,40 @@ pub enum Value { Bytes(Vec), } +impl Eq for Value {} +impl Ord for Value { + fn cmp(&self, other: &Self) -> Ordering { + match (self,other) { + (Value::Str(l), Value::Str(r)) => l.cmp(r), + (Value::U64(l), Value::U64(r)) => l.cmp(r), + (Value::I64(l), Value::I64(r)) => l.cmp(r), + (Value::Date(l), Value::Date(r)) => l.cmp(r), + (Value::Facet(l), Value::Facet(r)) => l.cmp(r), + (Value::Bytes(l), Value::Bytes(r)) => l.cmp(r), + (Value::F64(l), Value::F64(r)) => { + match (l.is_nan(),r.is_nan()) { + (false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN + (true, true) => Ordering::Equal, + (true, false) => Ordering::Less, // we define NaN as less than -∞ + (false, true) => Ordering::Greater, + } + } + (Value::Str(_), _) => Ordering::Less, + (_, Value::Str(_)) => Ordering::Greater, + (Value::U64(_), _) => Ordering::Less, + (_, Value::U64(_)) => Ordering::Greater, + (Value::I64(_), _) => Ordering::Less, + (_, Value::I64(_)) => Ordering::Greater, + (Value::F64(_), _) => Ordering::Less, + (_, Value::F64(_)) => Ordering::Greater, + (Value::Date(_), _) => Ordering::Less, + (_, Value::Date(_)) => Ordering::Greater, + (Value::Facet(_), _) => Ordering::Less, + (_, Value::Facet(_)) => Ordering::Greater, + } + } +} + impl Serialize for Value { fn serialize(&self, serializer: S) -> Result where @@ -31,6 +67,7 @@ impl Serialize for Value { Value::Str(ref v) => serializer.serialize_str(v), Value::U64(u) => serializer.serialize_u64(u), Value::I64(u) => serializer.serialize_i64(u), + Value::F64(u) => serializer.serialize_f64(u), Value::Date(ref date) => serializer.serialize_i64(date.timestamp()), Value::Facet(ref facet) => facet.serialize(serializer), Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes), @@ -60,6 +97,10 @@ impl<'de> Deserialize<'de> for Value { Ok(Value::I64(v)) } + fn visit_f64(self, v: f64) -> Result { + Ok(Value::F64(v)) + } + fn visit_str(self, v: &str) -> Result { Ok(Value::Str(v.to_owned())) } @@ -75,9 +116,7 @@ impl<'de> Deserialize<'de> for Value { impl Value { /// Returns the text value, provided the value is of the `Str` type. - /// - /// # Panics - /// If the value is not of type `Str` + /// (Returns None if the value is not of the `Str` type). pub fn text(&self) -> Option<&str> { match *self { Value::Str(ref text) => Some(text), @@ -92,7 +131,7 @@ impl Value { pub fn u64_value(&self) -> u64 { match *self { Value::U64(ref value) => *value, - _ => panic!("This is not a text field."), + _ => panic!("This is not a u64 field."), } } @@ -103,10 +142,21 @@ impl Value { pub fn i64_value(&self) -> i64 { match *self { Value::I64(ref value) => *value, - _ => panic!("This is not a text field."), + _ => panic!("This is not a i64 field."), } } + /// Returns the f64-value, provided the value is of the `F64` type. + /// + /// # Panics + /// If the value is not of type `F64` + pub fn f64_value(&self) -> f64 { + match *self { + Value::F64(ref value) => *value, + _ => panic!("This is not a f64 field."), + } + } + /// Returns the Date-value, provided the value is of the `Date` type. /// /// # Panics @@ -137,6 +187,12 @@ impl From for Value { } } +impl From for Value { + fn from(v: f64) -> Value { + Value::F64(v) + } +} + impl From for Value { fn from(date_time: DateTime) -> Value { Value::Date(date_time) @@ -163,7 +219,7 @@ impl From> for Value { mod binary_serialize { use super::Value; - use crate::common::BinarySerializable; + use crate::common::{BinarySerializable, f64_to_u64, u64_to_f64}; use crate::schema::Facet; use chrono::{TimeZone, Utc}; use std::io::{self, Read, Write}; @@ -174,6 +230,7 @@ mod binary_serialize { const HIERARCHICAL_FACET_CODE: u8 = 3; const BYTES_CODE: u8 = 4; const DATE_CODE: u8 = 5; + const F64_CODE: u8 = 6; impl BinarySerializable for Value { fn serialize(&self, writer: &mut W) -> io::Result<()> { @@ -190,6 +247,10 @@ mod binary_serialize { I64_CODE.serialize(writer)?; val.serialize(writer) } + Value::F64(ref val) => { + F64_CODE.serialize(writer)?; + f64_to_u64(*val).serialize(writer) + } Value::Date(ref val) => { DATE_CODE.serialize(writer)?; val.timestamp().serialize(writer) @@ -219,6 +280,10 @@ mod binary_serialize { let value = i64::deserialize(reader)?; Ok(Value::I64(value)) } + F64_CODE => { + let value = u64_to_f64(u64::deserialize(reader)?); + Ok(Value::F64(value)) + } DATE_CODE => { let timestamp = i64::deserialize(reader)?; Ok(Value::Date(Utc.timestamp(timestamp, 0))) diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index c1c387a1b..9ada12708 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -14,6 +14,9 @@ lexicographical order matches the natural order of integers. `i64`-terms are transformed to `u64` using a continuous mapping `val ⟶ val - i64::min_value()` and then treated as a `u64`. +`f64`-terms are transformed to `u64` using a mapping that preserve order, and are then treated +as `u64`. + A second datastructure makes it possible to access a [`TermInfo`](../postings/struct.TermInfo.html). */