mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
add support for float (#603)
* add basic support for float as for i64, they are mapped to u64 for indexing query parser don't work yet * Update value.rs * implement support for float in query parser * Update README.md
This commit is contained in:
committed by
Paul Masurel
parent
c3231ca252
commit
6eb4e08636
@@ -1,3 +1,8 @@
|
||||
Tantivy 0.11.0
|
||||
=====================
|
||||
|
||||
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
|
||||
|
||||
Tantivy 0.10.0
|
||||
=====================
|
||||
|
||||
|
||||
@@ -50,9 +50,9 @@ performance for different type of queries / collection.
|
||||
- Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
|
||||
- Mmap directory
|
||||
- SIMD integer compression when the platform/CPU includes the SSE2 instruction set.
|
||||
- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene)
|
||||
- Single valued and multivalued u64, i64 and f64 fast fields (equivalent of doc values in Lucene)
|
||||
- `&[u8]` fast fields
|
||||
- Text, i64, u64, dates and hierarchical facet fields
|
||||
- Text, i64, u64, f64, dates and hierarchical facet fields
|
||||
- LZ4 compressed document store
|
||||
- Range queries
|
||||
- Faceted search
|
||||
|
||||
@@ -82,6 +82,7 @@ mod tests {
|
||||
let mut schema_builder = schema::Schema::builder();
|
||||
let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
|
||||
let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
|
||||
let num_field_f64 = schema_builder.add_f64_field("num_f64", FAST);
|
||||
let text_field = schema_builder.add_text_field("text", STRING);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
@@ -94,6 +95,7 @@ mod tests {
|
||||
index_writer.add_document(doc!(
|
||||
num_field_i64 => ((i as i64) % 3i64) as i64,
|
||||
num_field_u64 => (i % 2u64) as u64,
|
||||
num_field_f64 => (i % 4u64) as f64,
|
||||
text_field => "text"
|
||||
));
|
||||
}
|
||||
@@ -104,10 +106,11 @@ mod tests {
|
||||
let searcher = index.reader().searcher();
|
||||
let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64);
|
||||
let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64);
|
||||
let mut ffvf_f64: IntFacetCollector<F64FastFieldReader> = IntFacetCollector::new(num_field_f64);
|
||||
|
||||
{
|
||||
// perform the query
|
||||
let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
|
||||
let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64).push(&mut ffvf_f64);
|
||||
let mut query_parser = QueryParser::for_index(index, vec![text_field]);
|
||||
let query = query_parser.parse_query("text:text").unwrap();
|
||||
query.search(&searcher, &mut facet_collectors).unwrap();
|
||||
@@ -117,6 +120,8 @@ mod tests {
|
||||
assert_eq!(ffvf_u64.counters[&1], 5);
|
||||
assert_eq!(ffvf_i64.counters[&0], 4);
|
||||
assert_eq!(ffvf_i64.counters[&1], 3);
|
||||
assert_eq!(ffvf_f64.counters[&0.0], 3);
|
||||
assert_eq!(ffvf_f64.counters[&2.0], 2);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,6 +160,7 @@ impl TopDocs {
|
||||
.fast_fields()
|
||||
.u64(field)
|
||||
.expect("Field requested is not a i64/u64 fast field.");
|
||||
//TODO error message missmatch actual behavior for i64
|
||||
move |doc: DocId| ff_reader.get(doc)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -99,15 +99,54 @@ pub fn u64_to_i64(val: u64) -> i64 {
|
||||
(val ^ HIGHEST_BIT) as i64
|
||||
}
|
||||
|
||||
/// Maps a `f64` to `u64`
|
||||
///
|
||||
/// For simplicity, tantivy internally handles `f64` as `u64`.
|
||||
/// The mapping is defined by this function.
|
||||
///
|
||||
/// Maps `f64` to `u64` so that lexical order is preserved.
|
||||
///
|
||||
/// This is more suited than simply casting (`val as u64`)
|
||||
/// which would truncate the result
|
||||
///
|
||||
/// # See also
|
||||
/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
|
||||
#[inline(always)]
|
||||
pub fn f64_to_u64(val: f64) -> u64 {
|
||||
let bits = val.to_bits();
|
||||
if val.is_sign_positive() {
|
||||
bits ^ HIGHEST_BIT
|
||||
} else {
|
||||
!bits
|
||||
}
|
||||
}
|
||||
|
||||
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
||||
#[inline(always)]
|
||||
pub fn u64_to_f64(val: u64) -> f64 {
|
||||
f64::from_bits(
|
||||
if val & HIGHEST_BIT != 0 {
|
||||
val ^ HIGHEST_BIT
|
||||
} else {
|
||||
!val
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
|
||||
pub use super::serialize::test::fixed_size_test;
|
||||
use super::{compute_num_bits, i64_to_u64, u64_to_i64};
|
||||
use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
|
||||
use std::f64;
|
||||
|
||||
fn test_i64_converter_helper(val: i64) {
|
||||
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
|
||||
}
|
||||
|
||||
fn test_f64_converter_helper(val: f64) {
|
||||
assert_eq!(u64_to_f64(f64_to_u64(val)), val);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_i64_converter() {
|
||||
@@ -121,6 +160,28 @@ pub(crate) mod test {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_f64_converter() {
|
||||
test_f64_converter_helper(f64::INFINITY);
|
||||
test_f64_converter_helper(f64::NEG_INFINITY);
|
||||
test_f64_converter_helper(0.0);
|
||||
test_f64_converter_helper(-0.0);
|
||||
test_f64_converter_helper(1.0);
|
||||
test_f64_converter_helper(-1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_f64_order() {
|
||||
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number
|
||||
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
|
||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
|
||||
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
|
||||
assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
|
||||
assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
|
||||
assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
|
||||
assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_num_bits() {
|
||||
assert_eq!(compute_num_bits(1), 1u8);
|
||||
|
||||
@@ -102,6 +102,19 @@ impl FixedSize for i64 {
|
||||
const SIZE_IN_BYTES: usize = 8;
|
||||
}
|
||||
|
||||
impl BinarySerializable for f64 {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_f64::<Endianness>(*self)
|
||||
}
|
||||
fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
|
||||
reader.read_f64::<Endianness>()
|
||||
}
|
||||
}
|
||||
|
||||
impl FixedSize for f64 {
|
||||
const SIZE_IN_BYTES: usize = 8;
|
||||
}
|
||||
|
||||
impl BinarySerializable for u8 {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_u8(*self)
|
||||
@@ -172,6 +185,11 @@ pub mod test {
|
||||
fixed_size_test::<i64>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_f64() {
|
||||
fixed_size_test::<f64>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_u64() {
|
||||
fixed_size_test::<u64>();
|
||||
|
||||
@@ -48,7 +48,7 @@ mod readers;
|
||||
mod serializer;
|
||||
mod writer;
|
||||
|
||||
/// Trait for types that are allowed for fast fields: (u64 or i64).
|
||||
/// Trait for types that are allowed for fast fields: (u64, i64 and f64).
|
||||
pub trait FastValue: Default + Clone + Copy + Send + Sync + PartialOrd {
|
||||
/// Converts a value from u64
|
||||
///
|
||||
@@ -114,11 +114,33 @@ impl FastValue for i64 {
|
||||
}
|
||||
}
|
||||
|
||||
impl FastValue for f64 {
|
||||
fn from_u64(val: u64) -> Self {
|
||||
common::u64_to_f64(val)
|
||||
}
|
||||
|
||||
fn to_u64(&self) -> u64 {
|
||||
common::f64_to_u64(*self)
|
||||
}
|
||||
|
||||
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
|
||||
match *field_type {
|
||||
FieldType::F64(ref integer_options) => integer_options.get_fastfield_cardinality(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> u64 {
|
||||
self.to_bits()
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_u64(value: &Value) -> u64 {
|
||||
match *value {
|
||||
Value::U64(ref val) => *val,
|
||||
Value::I64(ref val) => common::i64_to_u64(*val),
|
||||
_ => panic!("Expected a u64/i64 field, got {:?} ", value),
|
||||
Value::F64(ref val) => common::f64_to_u64(*val),
|
||||
_ => panic!("Expected a u64/i64/f64 field, got {:?} ", value),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,10 @@ use std::collections::HashMap;
|
||||
pub struct FastFieldReaders {
|
||||
fast_field_i64: HashMap<Field, FastFieldReader<i64>>,
|
||||
fast_field_u64: HashMap<Field, FastFieldReader<u64>>,
|
||||
fast_field_f64: HashMap<Field, FastFieldReader<f64>>,
|
||||
fast_field_i64s: HashMap<Field, MultiValueIntFastFieldReader<i64>>,
|
||||
fast_field_u64s: HashMap<Field, MultiValueIntFastFieldReader<u64>>,
|
||||
fast_field_f64s: HashMap<Field, MultiValueIntFastFieldReader<f64>>,
|
||||
fast_bytes: HashMap<Field, BytesFastFieldReader>,
|
||||
fast_fields_composite: CompositeFile,
|
||||
}
|
||||
@@ -23,6 +25,7 @@ pub struct FastFieldReaders {
|
||||
enum FastType {
|
||||
I64,
|
||||
U64,
|
||||
F64,
|
||||
}
|
||||
|
||||
fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> {
|
||||
@@ -33,6 +36,9 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality
|
||||
FieldType::I64(options) => options
|
||||
.get_fastfield_cardinality()
|
||||
.map(|cardinality| (FastType::I64, cardinality)),
|
||||
FieldType::F64(options) => options
|
||||
.get_fastfield_cardinality()
|
||||
.map(|cardinality| (FastType::F64, cardinality)),
|
||||
FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)),
|
||||
_ => None,
|
||||
}
|
||||
@@ -46,8 +52,10 @@ impl FastFieldReaders {
|
||||
let mut fast_field_readers = FastFieldReaders {
|
||||
fast_field_i64: Default::default(),
|
||||
fast_field_u64: Default::default(),
|
||||
fast_field_f64: Default::default(),
|
||||
fast_field_i64s: Default::default(),
|
||||
fast_field_u64s: Default::default(),
|
||||
fast_field_f64s: Default::default(),
|
||||
fast_bytes: Default::default(),
|
||||
fast_fields_composite: fast_fields_composite.clone(),
|
||||
};
|
||||
@@ -82,6 +90,12 @@ impl FastFieldReaders {
|
||||
FastFieldReader::open(fast_field_data.clone()),
|
||||
);
|
||||
}
|
||||
FastType::F64 => {
|
||||
fast_field_readers.fast_field_f64.insert(
|
||||
field,
|
||||
FastFieldReader::open(fast_field_data.clone()),
|
||||
);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(From::from(FastFieldNotAvailableError::new(field_entry)));
|
||||
@@ -109,6 +123,14 @@ impl FastFieldReaders {
|
||||
.fast_field_u64s
|
||||
.insert(field, multivalued_int_fast_field);
|
||||
}
|
||||
FastType::F64 => {
|
||||
let vals_reader = FastFieldReader::open(fast_field_data);
|
||||
let multivalued_int_fast_field =
|
||||
MultiValueIntFastFieldReader::open(idx_reader, vals_reader);
|
||||
fast_field_readers
|
||||
.fast_field_f64s
|
||||
.insert(field, multivalued_int_fast_field);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(From::from(FastFieldNotAvailableError::new(field_entry)));
|
||||
@@ -135,6 +157,8 @@ impl FastFieldReaders {
|
||||
/// If the field is a i64-fast field, return the associated u64 reader. Values are
|
||||
/// mapped from i64 to u64 using a (well the, it is unique) monotonic mapping. ///
|
||||
///
|
||||
///TODO should it also be lenient with f64?
|
||||
///
|
||||
/// This method is useful when merging segment reader.
|
||||
pub(crate) fn u64_lenient(&self, field: Field) -> Option<FastFieldReader<u64>> {
|
||||
if let Some(u64_ff_reader) = self.u64(field) {
|
||||
@@ -153,6 +177,13 @@ impl FastFieldReaders {
|
||||
self.fast_field_i64.get(&field).cloned()
|
||||
}
|
||||
|
||||
/// Returns the `f64` fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a f64 fast field, this method returns `None`.
|
||||
pub fn f64(&self, field: Field) -> Option<FastFieldReader<f64>> {
|
||||
self.fast_field_f64.get(&field).cloned()
|
||||
}
|
||||
|
||||
/// Returns a `u64s` multi-valued fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a u64 multi-valued fast field, this method returns `None`.
|
||||
@@ -182,6 +213,13 @@ impl FastFieldReaders {
|
||||
self.fast_field_i64s.get(&field).cloned()
|
||||
}
|
||||
|
||||
/// Returns a `f64s` multi-valued fast field reader reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a f64 multi-valued fast field, this method returns `None`.
|
||||
pub fn f64s(&self, field: Field) -> Option<MultiValueIntFastFieldReader<f64>> {
|
||||
self.fast_field_f64s.get(&field).cloned()
|
||||
}
|
||||
|
||||
/// Returns the `bytes` fast field reader associated to `field`.
|
||||
///
|
||||
/// If `field` is not a bytes fast field, returns `None`.
|
||||
|
||||
@@ -25,13 +25,13 @@ impl FastFieldsWriter {
|
||||
|
||||
for (field_id, field_entry) in schema.fields().iter().enumerate() {
|
||||
let field = Field(field_id as u32);
|
||||
let default_value = if let FieldType::I64(_) = *field_entry.field_type() {
|
||||
common::i64_to_u64(0i64)
|
||||
} else {
|
||||
0u64
|
||||
let default_value = match *field_entry.field_type() {
|
||||
FieldType::I64(_) => common::i64_to_u64(0i64),
|
||||
FieldType::F64(_) => common::f64_to_u64(0.0f64),
|
||||
_ => 0u64,
|
||||
};
|
||||
match *field_entry.field_type() {
|
||||
FieldType::I64(ref int_options) | FieldType::U64(ref int_options) => {
|
||||
FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => {
|
||||
match int_options.get_fastfield_cardinality() {
|
||||
Some(Cardinality::SingleValue) => {
|
||||
let mut fast_field_writer = IntFastFieldWriter::new(field);
|
||||
@@ -142,9 +142,9 @@ impl FastFieldsWriter {
|
||||
/// bitpacked and the number of bits required for bitpacking
|
||||
/// can only been known once we have seen all of the values.
|
||||
///
|
||||
/// Both u64, and i64 use the same writer.
|
||||
/// i64 are just remapped to the `0..2^64 - 1`
|
||||
/// using `common::i64_to_u64`.
|
||||
/// Both u64, i64 and f64 use the same writer.
|
||||
/// i64 and f64 are just remapped to the `0..2^64 - 1`
|
||||
/// using `common::i64_to_u64` and `common::f64_to_u64`.
|
||||
pub struct IntFastFieldWriter {
|
||||
field: Field,
|
||||
vals: Vec<u8>,
|
||||
@@ -203,8 +203,8 @@ impl IntFastFieldWriter {
|
||||
/// Extract the value associated to the fast field for
|
||||
/// this document.
|
||||
///
|
||||
/// i64 are remapped to u64 using the logic
|
||||
/// in `common::i64_to_u64`.
|
||||
/// i64 and f64 are remapped to u64 using the logic
|
||||
/// in `common::i64_to_u64` and `common::f64_to_u64`.
|
||||
///
|
||||
/// If the value is missing, then the default value is used
|
||||
/// instead.
|
||||
|
||||
@@ -207,6 +207,7 @@ impl IndexMerger {
|
||||
}
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
|
||||
Some(Cardinality::SingleValue) => {
|
||||
self.write_single_fast_field(field, fast_field_serializer)?;
|
||||
|
||||
@@ -214,6 +214,17 @@ impl SegmentWriter {
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldType::F64(ref int_option) => {
|
||||
if int_option.is_indexed() {
|
||||
for field_value in field_values {
|
||||
let term = Term::from_field_f64(
|
||||
field_value.field(),
|
||||
field_value.value().f64_value(),
|
||||
);
|
||||
self.multifield_postings.subscribe(doc_id, &term);
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldType::Bytes => {
|
||||
// Do nothing. Bytes only supports fast fields.
|
||||
}
|
||||
|
||||
44
src/lib.rs
44
src/lib.rs
@@ -179,7 +179,7 @@ pub use crate::indexer::IndexWriter;
|
||||
pub use crate::postings::Postings;
|
||||
pub use crate::schema::{Document, Term};
|
||||
|
||||
pub use crate::common::{i64_to_u64, u64_to_i64};
|
||||
pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
|
||||
|
||||
/// Expose the current version of tantivy, as well
|
||||
/// whether it was compiled with the simd compression.
|
||||
@@ -625,6 +625,30 @@ mod tests {
|
||||
assert!(!postings.advance());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_indexed_f64() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let value_field = schema_builder.add_f64_field("value", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
|
||||
let val = std::f64::consts::PI;
|
||||
index_writer.add_document(doc!(value_field => val));
|
||||
index_writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let term = Term::from_field_f64(value_field, val);
|
||||
let mut postings = searcher
|
||||
.segment_reader(0)
|
||||
.inverted_index(term.field())
|
||||
.read_postings(&term, IndexRecordOption::Basic)
|
||||
.unwrap();
|
||||
assert!(postings.advance());
|
||||
assert_eq!(postings.doc(), 0);
|
||||
assert!(!postings.advance());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_indexedfield_not_in_documents() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
@@ -817,6 +841,7 @@ mod tests {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
|
||||
let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
|
||||
let fast_field_float = schema_builder.add_f64_field("float", FAST);
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let stored_int_field = schema_builder.add_u64_field("text", STORED);
|
||||
let schema = schema_builder.build();
|
||||
@@ -824,7 +849,7 @@ mod tests {
|
||||
let index = Index::create_in_ram(schema);
|
||||
let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
|
||||
{
|
||||
let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64);
|
||||
let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
|
||||
index_writer.add_document(document);
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
@@ -844,10 +869,14 @@ mod tests {
|
||||
assert!(fast_field_reader_opt.is_none());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_opt = segment_reader.fast_fields().i64(fast_field_signed);
|
||||
let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_float);
|
||||
assert!(fast_field_reader_opt.is_none());
|
||||
}
|
||||
{
|
||||
let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned);
|
||||
assert!(fast_field_reader_opt.is_some());
|
||||
let fast_field_reader = fast_field_reader_opt.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
||||
assert_eq!(fast_field_reader.get(0), 4u64)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -856,5 +885,12 @@ mod tests {
|
||||
let fast_field_reader = fast_field_reader_opt.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4i64)
|
||||
}
|
||||
|
||||
{
|
||||
let fast_field_reader_opt = segment_reader.fast_fields().f64(fast_field_float);
|
||||
assert!(fast_field_reader_opt.is_some());
|
||||
let fast_field_reader = fast_field_reader_opt.unwrap();
|
||||
assert_eq!(fast_field_reader.get(0), 4f64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter>
|
||||
.unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()),
|
||||
FieldType::U64(_)
|
||||
| FieldType::I64(_)
|
||||
| FieldType::F64(_)
|
||||
| FieldType::Date(_)
|
||||
| FieldType::HierarchicalFacet => SpecializedPostingsWriter::<NothingRecorder>::new_boxed(),
|
||||
FieldType::Bytes => {
|
||||
@@ -154,7 +155,7 @@ impl MultiFieldPostingsWriter {
|
||||
.collect();
|
||||
unordered_term_mappings.insert(field, mapping);
|
||||
}
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => {}
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {}
|
||||
FieldType::Bytes => {}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ parser! {
|
||||
parser! {
|
||||
fn word[I]()(I) -> String
|
||||
where [I: Stream<Item = char>] {
|
||||
many1(satisfy(char::is_alphanumeric))
|
||||
many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
|
||||
.and_then(|s: String| {
|
||||
match s.as_str() {
|
||||
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
|
||||
@@ -266,6 +266,7 @@ mod test {
|
||||
test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
|
||||
test_parse_query_to_ast_helper("(+a +b)", "(+(\"a\") +(\"b\"))");
|
||||
test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\"");
|
||||
test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\"");
|
||||
test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")");
|
||||
test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+(abc:\"toto\") -(\"titi\"))");
|
||||
test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")");
|
||||
@@ -277,6 +278,7 @@ mod test {
|
||||
test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
|
||||
test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
|
||||
test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
|
||||
test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
|
||||
test_is_parse_err("abc + ");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use crate::schema::{FieldType, Term};
|
||||
use crate::tokenizer::TokenizerManager;
|
||||
use combine::Parser;
|
||||
use std::borrow::Cow;
|
||||
use std::num::ParseIntError;
|
||||
use std::num::{ParseIntError, ParseFloatError};
|
||||
use std::ops::Bound;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -30,9 +30,12 @@ pub enum QueryParserError {
|
||||
/// `FieldDoesNotExist(field_name: String)`
|
||||
/// The query references a field that is not in the schema
|
||||
FieldDoesNotExist(String),
|
||||
/// The query contains a term for a `u64`-field, but the value
|
||||
/// is not a u64.
|
||||
/// The query contains a term for a `u64` or `i64`-field, but the value
|
||||
/// is neither.
|
||||
ExpectedInt(ParseIntError),
|
||||
/// The query contains a term for a `f64`-field, but the value
|
||||
/// is not a f64.
|
||||
ExpectedFloat(ParseFloatError),
|
||||
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
||||
AllButQueryForbidden,
|
||||
/// If no default field is declared, running a query without any
|
||||
@@ -60,6 +63,12 @@ impl From<ParseIntError> for QueryParserError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseFloatError> for QueryParserError {
|
||||
fn from(err: ParseFloatError) -> QueryParserError {
|
||||
QueryParserError::ExpectedFloat(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<chrono::ParseError> for QueryParserError {
|
||||
fn from(err: chrono::ParseError) -> QueryParserError {
|
||||
QueryParserError::DateFormatError(err)
|
||||
@@ -239,6 +248,11 @@ impl QueryParser {
|
||||
let term = Term::from_field_i64(field, val);
|
||||
Ok(vec![(0, term)])
|
||||
}
|
||||
FieldType::F64(_) => {
|
||||
let val: f64 = f64::from_str(phrase)?;
|
||||
let term = Term::from_field_f64(field, val);
|
||||
Ok(vec![(0, term)])
|
||||
}
|
||||
FieldType::Date(_) => match chrono::DateTime::parse_from_rfc3339(phrase) {
|
||||
Ok(x) => Ok(vec![(
|
||||
0,
|
||||
@@ -529,6 +543,7 @@ mod test {
|
||||
schema_builder.add_text_field("nottokenized", STRING);
|
||||
schema_builder.add_text_field("with_stop_words", text_options);
|
||||
schema_builder.add_date_field("date", INDEXED);
|
||||
schema_builder.add_f64_field("float", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let default_fields = vec![title, text];
|
||||
let tokenizer_manager = TokenizerManager::default();
|
||||
@@ -634,6 +649,13 @@ mod test {
|
||||
assert!(query_parser
|
||||
.parse_query("unsigned:\"18446744073709551615\"")
|
||||
.is_ok());
|
||||
assert!(query_parser.parse_query("float:\"3.1\"").is_ok());
|
||||
assert!(query_parser.parse_query("float:\"-2.4\"").is_ok());
|
||||
assert!(query_parser.parse_query("float:\"2.1.2\"").is_err());
|
||||
assert!(query_parser.parse_query("float:\"2.1a\"").is_err());
|
||||
assert!(query_parser
|
||||
.parse_query("float:\"18446744073709551615.0\"")
|
||||
.is_ok());
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"unsigned:2324",
|
||||
"Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])",
|
||||
@@ -645,6 +667,12 @@ mod test {
|
||||
&format!("{:?}", Term::from_field_i64(Field(2u32), -2324)),
|
||||
false,
|
||||
);
|
||||
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"float:2.5",
|
||||
&format!("{:?}", Term::from_field_f64(Field(10u32), 2.5)),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -786,6 +814,11 @@ mod test {
|
||||
query_parser.parse_query("signed:18b"),
|
||||
Err(QueryParserError::ExpectedInt(_))
|
||||
);
|
||||
assert!(query_parser.parse_query("float:\"1.8\"").is_ok());
|
||||
assert_matches!(
|
||||
query_parser.parse_query("float:1.8a"),
|
||||
Err(QueryParserError::ExpectedFloat(_))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -142,6 +142,39 @@ impl RangeQuery {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new `RangeQuery` over a `f64` field.
|
||||
///
|
||||
/// If the field is not of the type `f64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_f64(field: Field, range: Range<f64>) -> RangeQuery {
|
||||
RangeQuery::new_f64_bounds(
|
||||
field,
|
||||
Bound::Included(range.start),
|
||||
Bound::Excluded(range.end),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `f64` field.
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
/// ranges than semi-inclusive range.
|
||||
///
|
||||
/// If the field is not of the type `f64`, tantivy
|
||||
/// will panic when the `Weight` object is created.
|
||||
pub fn new_f64_bounds(
|
||||
field: Field,
|
||||
left_bound: Bound<f64>,
|
||||
right_bound: Bound<f64>,
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &f64| Term::from_field_f64(field, *val).value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type: Type::F64,
|
||||
left_bound: map_bound(&left_bound, &make_term_val),
|
||||
right_bound: map_bound(&right_bound, &make_term_val),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `RangeQuery` over a `u64` field.
|
||||
///
|
||||
/// The two `Bound` arguments make it possible to create more complex
|
||||
@@ -397,4 +430,61 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_float() {
|
||||
let float_field: Field;
|
||||
let schema = {
|
||||
let mut schema_builder = Schema::builder();
|
||||
float_field = schema_builder.add_f64_field("floatfield", INDEXED);
|
||||
schema_builder.build()
|
||||
};
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
{
|
||||
let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
|
||||
|
||||
for i in 1..100 {
|
||||
let mut doc = Document::new();
|
||||
for j in 1..100 {
|
||||
if i % j == 0 {
|
||||
doc.add_f64(float_field, j as f64);
|
||||
}
|
||||
}
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
|
||||
index_writer.commit().unwrap();
|
||||
}
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let count_multiples =
|
||||
|range_query: RangeQuery| searcher.search(&range_query, &Count).unwrap();
|
||||
|
||||
assert_eq!(count_multiples(RangeQuery::new_f64(float_field, 10.0..11.0)), 9);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_f64_bounds(
|
||||
float_field,
|
||||
Bound::Included(10.0),
|
||||
Bound::Included(11.0)
|
||||
)),
|
||||
18
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_f64_bounds(
|
||||
float_field,
|
||||
Bound::Excluded(9.0),
|
||||
Bound::Included(10.0)
|
||||
)),
|
||||
9
|
||||
);
|
||||
assert_eq!(
|
||||
count_multiples(RangeQuery::new_f64_bounds(
|
||||
float_field,
|
||||
Bound::Included(9.0),
|
||||
Bound::Unbounded
|
||||
)),
|
||||
91
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -88,6 +88,11 @@ impl Document {
|
||||
self.add(FieldValue::new(field, Value::I64(value)));
|
||||
}
|
||||
|
||||
/// Add a f64 field
|
||||
pub fn add_f64(&mut self, field: Field, value: f64) {
|
||||
self.add(FieldValue::new(field, Value::F64(value)));
|
||||
}
|
||||
|
||||
/// Add a date field
|
||||
pub fn add_date(&mut self, field: Field, value: &DateTime) {
|
||||
self.add(FieldValue::new(field, Value::Date(*value)));
|
||||
|
||||
@@ -48,6 +48,15 @@ impl FieldEntry {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new f64 field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_f64(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||
FieldEntry {
|
||||
name: field_name,
|
||||
field_type: FieldType::F64(field_type),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new date field entry in the schema, given
|
||||
/// a name, and some options.
|
||||
pub fn new_date(field_name: String, field_type: IntOptions) -> FieldEntry {
|
||||
@@ -89,6 +98,7 @@ impl FieldEntry {
|
||||
FieldType::Str(ref options) => options.get_indexing_options().is_some(),
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Date(ref options) => options.is_indexed(),
|
||||
FieldType::HierarchicalFacet => true,
|
||||
FieldType::Bytes => false,
|
||||
@@ -98,7 +108,7 @@ impl FieldEntry {
|
||||
/// Returns true iff the field is a int (signed or unsigned) fast field
|
||||
pub fn is_int_fast(&self) -> bool {
|
||||
match self.field_type {
|
||||
FieldType::U64(ref options) | FieldType::I64(ref options) => options.is_fast(),
|
||||
FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) => options.is_fast(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
@@ -108,6 +118,7 @@ impl FieldEntry {
|
||||
match self.field_type {
|
||||
FieldType::U64(ref options)
|
||||
| FieldType::I64(ref options)
|
||||
| FieldType::F64(ref options)
|
||||
| FieldType::Date(ref options) => options.is_stored(),
|
||||
FieldType::Str(ref options) => options.is_stored(),
|
||||
// TODO make stored hierarchical facet optional
|
||||
@@ -138,6 +149,10 @@ impl Serialize for FieldEntry {
|
||||
s.serialize_field("type", "i64")?;
|
||||
s.serialize_field("options", options)?;
|
||||
}
|
||||
FieldType::F64(ref options) => {
|
||||
s.serialize_field("type", "f64")?;
|
||||
s.serialize_field("options", options)?;
|
||||
}
|
||||
FieldType::Date(ref options) => {
|
||||
s.serialize_field("type", "date")?;
|
||||
s.serialize_field("options", options)?;
|
||||
@@ -205,7 +220,7 @@ impl<'de> Deserialize<'de> for FieldEntry {
|
||||
"bytes" => {
|
||||
field_type = Some(FieldType::Bytes);
|
||||
}
|
||||
"text" | "u64" | "i64" | "date" => {
|
||||
"text" | "u64" | "i64" | "f64" | "date" => {
|
||||
// These types require additional options to create a field_type
|
||||
}
|
||||
_ => panic!("unhandled type"),
|
||||
@@ -222,6 +237,7 @@ impl<'de> Deserialize<'de> for FieldEntry {
|
||||
"text" => field_type = Some(FieldType::Str(map.next_value()?)),
|
||||
"u64" => field_type = Some(FieldType::U64(map.next_value()?)),
|
||||
"i64" => field_type = Some(FieldType::I64(map.next_value()?)),
|
||||
"f64" => field_type = Some(FieldType::F64(map.next_value()?)),
|
||||
"date" => field_type = Some(FieldType::Date(map.next_value()?)),
|
||||
_ => {
|
||||
let msg = format!("Unrecognised type {}", ty);
|
||||
|
||||
@@ -35,6 +35,8 @@ pub enum Type {
|
||||
U64,
|
||||
/// `i64`
|
||||
I64,
|
||||
/// `f64`
|
||||
F64,
|
||||
/// `date(i64) timestamp`
|
||||
Date,
|
||||
/// `tantivy::schema::Facet`. Passed as a string in JSON.
|
||||
@@ -53,6 +55,8 @@ pub enum FieldType {
|
||||
U64(IntOptions),
|
||||
/// Signed 64-bits integers 64 field type configuration
|
||||
I64(IntOptions),
|
||||
/// 64-bits float 64 field type configuration
|
||||
F64(IntOptions),
|
||||
/// Signed 64-bits Date 64 field type configuration,
|
||||
Date(IntOptions),
|
||||
/// Hierachical Facet
|
||||
@@ -68,6 +72,7 @@ impl FieldType {
|
||||
FieldType::Str(_) => Type::Str,
|
||||
FieldType::U64(_) => Type::U64,
|
||||
FieldType::I64(_) => Type::I64,
|
||||
FieldType::F64(_) => Type::F64,
|
||||
FieldType::Date(_) => Type::Date,
|
||||
FieldType::HierarchicalFacet => Type::HierarchicalFacet,
|
||||
FieldType::Bytes => Type::Bytes,
|
||||
@@ -78,7 +83,7 @@ impl FieldType {
|
||||
pub fn is_indexed(&self) -> bool {
|
||||
match *self {
|
||||
FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(),
|
||||
FieldType::U64(ref int_options) | FieldType::I64(ref int_options) => {
|
||||
FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => {
|
||||
int_options.is_indexed()
|
||||
}
|
||||
FieldType::Date(ref date_options) => date_options.is_indexed(),
|
||||
@@ -98,6 +103,7 @@ impl FieldType {
|
||||
.map(TextFieldIndexing::index_option),
|
||||
FieldType::U64(ref int_options)
|
||||
| FieldType::I64(ref int_options)
|
||||
| FieldType::F64(ref int_options)
|
||||
| FieldType::Date(ref int_options) => {
|
||||
if int_options.is_indexed() {
|
||||
Some(IndexRecordOption::Basic)
|
||||
@@ -119,7 +125,7 @@ impl FieldType {
|
||||
match *json {
|
||||
JsonValue::String(ref field_text) => match *self {
|
||||
FieldType::Str(_) => Ok(Value::Str(field_text.clone())),
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => Err(
|
||||
FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => Err(
|
||||
ValueParsingError::TypeError(format!("Expected an integer, got {:?}", json)),
|
||||
),
|
||||
FieldType::HierarchicalFacet => Ok(Value::Facet(Facet::from(field_text))),
|
||||
@@ -146,6 +152,14 @@ impl FieldType {
|
||||
let msg = format!("Expected a u64 int, got {:?}", json);
|
||||
Err(ValueParsingError::OverflowError(msg))
|
||||
}
|
||||
},
|
||||
FieldType::F64(_) => {
|
||||
if let Some(field_val_f64) = field_val_num.as_f64() {
|
||||
Ok(Value::F64(field_val_f64))
|
||||
} else {
|
||||
let msg = format!("Expected a f64 int, got {:?}", json);
|
||||
Err(ValueParsingError::OverflowError(msg))
|
||||
}
|
||||
}
|
||||
FieldType::Str(_) | FieldType::HierarchicalFacet | FieldType::Bytes => {
|
||||
let msg = format!("Expected a string, got {:?}", json);
|
||||
|
||||
@@ -22,7 +22,7 @@ pub const STORED: SchemaFlagList<StoredFlag, ()> = SchemaFlagList {
|
||||
pub struct IndexedFlag;
|
||||
/// Flag to mark the field as indexed.
|
||||
///
|
||||
/// The `INDEXED` flag can only be used when building `IntOptions` (`u64` and `i64` fields)
|
||||
/// The `INDEXED` flag can only be used when building `IntOptions` (`u64`, `i64` and `f64` fields)
|
||||
/// Of course, text fields can also be indexed... But this is expressed by using either the
|
||||
/// `STRING` (untokenized) or `TEXT` (tokenized with the english tokenizer) flags.
|
||||
pub const INDEXED: SchemaFlagList<IndexedFlag, ()> = SchemaFlagList {
|
||||
@@ -36,7 +36,7 @@ pub struct FastFlag;
|
||||
///
|
||||
/// Fast fields can be random-accessed rapidly. Fields useful for scoring, filtering
|
||||
/// or collection should be mark as fast fields.
|
||||
/// The `FAST` flag can only be used when building `IntOptions` (`u64` and `i64` fields)
|
||||
/// The `FAST` flag can only be used when building `IntOptions` (`u64`, `i64` and `f64` fields)
|
||||
pub const FAST: SchemaFlagList<FastFlag, ()> = SchemaFlagList {
|
||||
head: FastFlag,
|
||||
tail: (),
|
||||
|
||||
@@ -54,7 +54,7 @@ On the other hand setting the field as stored or not determines whether the fiel
|
||||
when [`searcher.doc(doc_address)`](../struct.Searcher.html#method.doc) is called.
|
||||
|
||||
|
||||
## Setting a u64 or a i64 field
|
||||
## Setting a u64, a i64 or a f64 field
|
||||
|
||||
### Example
|
||||
|
||||
|
||||
@@ -82,6 +82,26 @@ impl SchemaBuilder {
|
||||
self.add_field(field_entry)
|
||||
}
|
||||
|
||||
/// Adds a new f64 field.
|
||||
/// Returns the associated field handle
|
||||
///
|
||||
/// # Caution
|
||||
///
|
||||
/// Appending two fields with the same name
|
||||
/// will result in the shadowing of the first
|
||||
/// by the second one.
|
||||
/// The first field will get a field id
|
||||
/// but only the second one will be indexed
|
||||
pub fn add_f64_field<T: Into<IntOptions>>(
|
||||
&mut self,
|
||||
field_name_str: &str,
|
||||
field_options: T,
|
||||
) -> Field {
|
||||
let field_name = String::from(field_name_str);
|
||||
let field_entry = FieldEntry::new_f64(field_name, field_options.into());
|
||||
self.add_field(field_entry)
|
||||
}
|
||||
|
||||
/// Adds a new date field.
|
||||
/// Returns the associated field handle
|
||||
/// Internally, Tantivy simply stores dates as i64 UTC timestamps,
|
||||
@@ -376,10 +396,14 @@ mod tests {
|
||||
let popularity_options = IntOptions::default()
|
||||
.set_stored()
|
||||
.set_fast(Cardinality::SingleValue);
|
||||
let score_options = IntOptions::default()
|
||||
.set_indexed()
|
||||
.set_fast(Cardinality::SingleValue);
|
||||
schema_builder.add_text_field("title", TEXT);
|
||||
schema_builder.add_text_field("author", STRING);
|
||||
schema_builder.add_u64_field("count", count_options);
|
||||
schema_builder.add_i64_field("popularity", popularity_options);
|
||||
schema_builder.add_f64_field("score", score_options);
|
||||
let schema = schema_builder.build();
|
||||
let schema_json = serde_json::to_string_pretty(&schema).unwrap();
|
||||
let expected = r#"[
|
||||
@@ -422,6 +446,15 @@ mod tests {
|
||||
"fast": "single",
|
||||
"stored": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "score",
|
||||
"type": "f64",
|
||||
"options": {
|
||||
"indexed": true,
|
||||
"fast": "single",
|
||||
"stored": false
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
assert_eq!(schema_json, expected);
|
||||
@@ -434,6 +467,8 @@ mod tests {
|
||||
assert_eq!("author", fields.next().unwrap().name());
|
||||
assert_eq!("count", fields.next().unwrap().name());
|
||||
assert_eq!("popularity", fields.next().unwrap().name());
|
||||
assert_eq!("score", fields.next().unwrap().name());
|
||||
assert!(fields.next().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -466,10 +501,14 @@ mod tests {
|
||||
let popularity_options = IntOptions::default()
|
||||
.set_stored()
|
||||
.set_fast(Cardinality::SingleValue);
|
||||
let score_options = IntOptions::default()
|
||||
.set_indexed()
|
||||
.set_fast(Cardinality::SingleValue);
|
||||
let title_field = schema_builder.add_text_field("title", TEXT);
|
||||
let author_field = schema_builder.add_text_field("author", STRING);
|
||||
let count_field = schema_builder.add_u64_field("count", count_options);
|
||||
let popularity_field = schema_builder.add_i64_field("popularity", popularity_options);
|
||||
let score_field = schema_builder.add_f64_field("score", score_options);
|
||||
let schema = schema_builder.build();
|
||||
{
|
||||
let doc = schema.parse_document("{}").unwrap();
|
||||
@@ -482,7 +521,8 @@ mod tests {
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 4,
|
||||
"popularity": 10
|
||||
"popularity": 10,
|
||||
"score": 80.5
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -493,6 +533,7 @@ mod tests {
|
||||
);
|
||||
assert_eq!(doc.get_first(count_field).unwrap().u64_value(), 4);
|
||||
assert_eq!(doc.get_first(popularity_field).unwrap().i64_value(), 10);
|
||||
assert_eq!(doc.get_first(score_field).unwrap().f64_value(), 80.5);
|
||||
}
|
||||
{
|
||||
let json_err = schema.parse_document(
|
||||
@@ -501,6 +542,7 @@ mod tests {
|
||||
"author": "fulmicoton",
|
||||
"count": 4,
|
||||
"popularity": 10,
|
||||
"score": 80.5,
|
||||
"jambon": "bayonne"
|
||||
}"#,
|
||||
);
|
||||
@@ -513,6 +555,7 @@ mod tests {
|
||||
"author": "fulmicoton",
|
||||
"count": "5",
|
||||
"popularity": "10",
|
||||
"score": "80.5",
|
||||
"jambon": "bayonne"
|
||||
}"#,
|
||||
);
|
||||
@@ -527,7 +570,8 @@ mod tests {
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": -5,
|
||||
"popularity": 10
|
||||
"popularity": 10,
|
||||
"score": 80.5
|
||||
}"#,
|
||||
);
|
||||
assert_matches!(
|
||||
@@ -541,7 +585,8 @@ mod tests {
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 9223372036854775808,
|
||||
"popularity": 10
|
||||
"popularity": 10,
|
||||
"score": 80.5
|
||||
}"#,
|
||||
);
|
||||
assert!(!matches!(
|
||||
@@ -555,7 +600,8 @@ mod tests {
|
||||
"title": "my title",
|
||||
"author": "fulmicoton",
|
||||
"count": 50,
|
||||
"popularity": 9223372036854775808
|
||||
"popularity": 9223372036854775808,
|
||||
"score": 80.5
|
||||
}"#,
|
||||
);
|
||||
assert_matches!(
|
||||
|
||||
@@ -19,9 +19,9 @@ where
|
||||
B: AsRef<[u8]>;
|
||||
|
||||
impl Term {
|
||||
/// Builds a term given a field, and a u64-value
|
||||
/// Builds a term given a field, and a i64-value
|
||||
///
|
||||
/// Assuming the term has a field id of 1, and a u64 value of 3234,
|
||||
/// Assuming the term has a field id of 1, and a i64 value of 3234,
|
||||
/// the Term will have 8 bytes.
|
||||
///
|
||||
/// The first four byte are dedicated to storing the field id as a u64.
|
||||
@@ -31,6 +31,18 @@ impl Term {
|
||||
Term::from_field_u64(field, val_u64)
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a f64-value
|
||||
///
|
||||
/// Assuming the term has a field id of 1, and a u64 value of 3234,
|
||||
/// the Term will have 8 bytes. <= this is wrong
|
||||
///
|
||||
/// The first four byte are dedicated to storing the field id as a u64.
|
||||
/// The 4 following bytes are encoding the u64 value.
|
||||
pub fn from_field_f64(field: Field, val: f64) -> Term {
|
||||
let val_u64: u64 = common::f64_to_u64(val);
|
||||
Term::from_field_u64(field, val_u64)
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a DateTime value
|
||||
///
|
||||
/// Assuming the term has a field id of 1, and a timestamp i64 value of 3234,
|
||||
@@ -112,6 +124,11 @@ impl Term {
|
||||
self.set_u64(common::i64_to_u64(val));
|
||||
}
|
||||
|
||||
/// Sets a `f64` value in the term.
|
||||
pub fn set_f64(&mut self, val: f64) {
|
||||
self.set_u64(common::f64_to_u64(val));
|
||||
}
|
||||
|
||||
fn set_bytes(&mut self, bytes: &[u8]) {
|
||||
self.0.resize(4, 0u8);
|
||||
self.0.extend(bytes);
|
||||
@@ -161,6 +178,15 @@ where
|
||||
common::u64_to_i64(BigEndian::read_u64(&self.0.as_ref()[4..]))
|
||||
}
|
||||
|
||||
/// Returns the `f64` value stored in a term.
|
||||
///
|
||||
/// # Panics
|
||||
/// ... or returns an invalid value
|
||||
/// if the term is not a `i64` field.
|
||||
pub fn get_f64(&self) -> f64 {
|
||||
common::u64_to_f64(BigEndian::read_u64(&self.0.as_ref()[4..]))
|
||||
}
|
||||
|
||||
/// Returns the text associated with the term.
|
||||
///
|
||||
/// # Panics
|
||||
|
||||
@@ -2,11 +2,11 @@ use crate::schema::Facet;
|
||||
use crate::DateTime;
|
||||
use serde::de::Visitor;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use std::fmt;
|
||||
use std::{fmt, cmp::Ordering};
|
||||
|
||||
/// Value represents the value of a any field.
|
||||
/// It is an enum over all over all of the possible field type.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
|
||||
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
||||
pub enum Value {
|
||||
/// The str type is used for any text information.
|
||||
Str(String),
|
||||
@@ -14,6 +14,8 @@ pub enum Value {
|
||||
U64(u64),
|
||||
/// Signed 64-bits Integer `i64`
|
||||
I64(i64),
|
||||
/// 64-bits Float `f64`
|
||||
F64(f64),
|
||||
/// Signed 64-bits Date time stamp `date`
|
||||
Date(DateTime),
|
||||
/// Hierarchical Facet
|
||||
@@ -22,6 +24,40 @@ pub enum Value {
|
||||
Bytes(Vec<u8>),
|
||||
}
|
||||
|
||||
impl Eq for Value {}
|
||||
impl Ord for Value {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
match (self,other) {
|
||||
(Value::Str(l), Value::Str(r)) => l.cmp(r),
|
||||
(Value::U64(l), Value::U64(r)) => l.cmp(r),
|
||||
(Value::I64(l), Value::I64(r)) => l.cmp(r),
|
||||
(Value::Date(l), Value::Date(r)) => l.cmp(r),
|
||||
(Value::Facet(l), Value::Facet(r)) => l.cmp(r),
|
||||
(Value::Bytes(l), Value::Bytes(r)) => l.cmp(r),
|
||||
(Value::F64(l), Value::F64(r)) => {
|
||||
match (l.is_nan(),r.is_nan()) {
|
||||
(false, false) => l.partial_cmp(r).unwrap(), // only fail on NaN
|
||||
(true, true) => Ordering::Equal,
|
||||
(true, false) => Ordering::Less, // we define NaN as less than -∞
|
||||
(false, true) => Ordering::Greater,
|
||||
}
|
||||
}
|
||||
(Value::Str(_), _) => Ordering::Less,
|
||||
(_, Value::Str(_)) => Ordering::Greater,
|
||||
(Value::U64(_), _) => Ordering::Less,
|
||||
(_, Value::U64(_)) => Ordering::Greater,
|
||||
(Value::I64(_), _) => Ordering::Less,
|
||||
(_, Value::I64(_)) => Ordering::Greater,
|
||||
(Value::F64(_), _) => Ordering::Less,
|
||||
(_, Value::F64(_)) => Ordering::Greater,
|
||||
(Value::Date(_), _) => Ordering::Less,
|
||||
(_, Value::Date(_)) => Ordering::Greater,
|
||||
(Value::Facet(_), _) => Ordering::Less,
|
||||
(_, Value::Facet(_)) => Ordering::Greater,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Value {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
@@ -31,6 +67,7 @@ impl Serialize for Value {
|
||||
Value::Str(ref v) => serializer.serialize_str(v),
|
||||
Value::U64(u) => serializer.serialize_u64(u),
|
||||
Value::I64(u) => serializer.serialize_i64(u),
|
||||
Value::F64(u) => serializer.serialize_f64(u),
|
||||
Value::Date(ref date) => serializer.serialize_i64(date.timestamp()),
|
||||
Value::Facet(ref facet) => facet.serialize(serializer),
|
||||
Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes),
|
||||
@@ -60,6 +97,10 @@ impl<'de> Deserialize<'de> for Value {
|
||||
Ok(Value::I64(v))
|
||||
}
|
||||
|
||||
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> {
|
||||
Ok(Value::F64(v))
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> {
|
||||
Ok(Value::Str(v.to_owned()))
|
||||
}
|
||||
@@ -75,9 +116,7 @@ impl<'de> Deserialize<'de> for Value {
|
||||
|
||||
impl Value {
|
||||
/// Returns the text value, provided the value is of the `Str` type.
|
||||
///
|
||||
/// # Panics
|
||||
/// If the value is not of type `Str`
|
||||
/// (Returns None if the value is not of the `Str` type).
|
||||
pub fn text(&self) -> Option<&str> {
|
||||
match *self {
|
||||
Value::Str(ref text) => Some(text),
|
||||
@@ -92,7 +131,7 @@ impl Value {
|
||||
pub fn u64_value(&self) -> u64 {
|
||||
match *self {
|
||||
Value::U64(ref value) => *value,
|
||||
_ => panic!("This is not a text field."),
|
||||
_ => panic!("This is not a u64 field."),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,10 +142,21 @@ impl Value {
|
||||
pub fn i64_value(&self) -> i64 {
|
||||
match *self {
|
||||
Value::I64(ref value) => *value,
|
||||
_ => panic!("This is not a text field."),
|
||||
_ => panic!("This is not a i64 field."),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the f64-value, provided the value is of the `F64` type.
|
||||
///
|
||||
/// # Panics
|
||||
/// If the value is not of type `F64`
|
||||
pub fn f64_value(&self) -> f64 {
|
||||
match *self {
|
||||
Value::F64(ref value) => *value,
|
||||
_ => panic!("This is not a f64 field."),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the Date-value, provided the value is of the `Date` type.
|
||||
///
|
||||
/// # Panics
|
||||
@@ -137,6 +187,12 @@ impl From<i64> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<f64> for Value {
|
||||
fn from(v: f64) -> Value {
|
||||
Value::F64(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DateTime> for Value {
|
||||
fn from(date_time: DateTime) -> Value {
|
||||
Value::Date(date_time)
|
||||
@@ -163,7 +219,7 @@ impl From<Vec<u8>> for Value {
|
||||
|
||||
mod binary_serialize {
|
||||
use super::Value;
|
||||
use crate::common::BinarySerializable;
|
||||
use crate::common::{BinarySerializable, f64_to_u64, u64_to_f64};
|
||||
use crate::schema::Facet;
|
||||
use chrono::{TimeZone, Utc};
|
||||
use std::io::{self, Read, Write};
|
||||
@@ -174,6 +230,7 @@ mod binary_serialize {
|
||||
const HIERARCHICAL_FACET_CODE: u8 = 3;
|
||||
const BYTES_CODE: u8 = 4;
|
||||
const DATE_CODE: u8 = 5;
|
||||
const F64_CODE: u8 = 6;
|
||||
|
||||
impl BinarySerializable for Value {
|
||||
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
@@ -190,6 +247,10 @@ mod binary_serialize {
|
||||
I64_CODE.serialize(writer)?;
|
||||
val.serialize(writer)
|
||||
}
|
||||
Value::F64(ref val) => {
|
||||
F64_CODE.serialize(writer)?;
|
||||
f64_to_u64(*val).serialize(writer)
|
||||
}
|
||||
Value::Date(ref val) => {
|
||||
DATE_CODE.serialize(writer)?;
|
||||
val.timestamp().serialize(writer)
|
||||
@@ -219,6 +280,10 @@ mod binary_serialize {
|
||||
let value = i64::deserialize(reader)?;
|
||||
Ok(Value::I64(value))
|
||||
}
|
||||
F64_CODE => {
|
||||
let value = u64_to_f64(u64::deserialize(reader)?);
|
||||
Ok(Value::F64(value))
|
||||
}
|
||||
DATE_CODE => {
|
||||
let timestamp = i64::deserialize(reader)?;
|
||||
Ok(Value::Date(Utc.timestamp(timestamp, 0)))
|
||||
|
||||
@@ -14,6 +14,9 @@ lexicographical order matches the natural order of integers.
|
||||
`i64`-terms are transformed to `u64` using a continuous mapping `val ⟶ val - i64::min_value()`
|
||||
and then treated as a `u64`.
|
||||
|
||||
`f64`-terms are transformed to `u64` using a mapping that preserve order, and are then treated
|
||||
as `u64`.
|
||||
|
||||
A second datastructure makes it possible to access a [`TermInfo`](../postings/struct.TermInfo.html).
|
||||
*/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user