From 2981e6c1df060ebd84a6ea964fe41865726d4344 Mon Sep 17 00:00:00 2001 From: boraarslan Date: Tue, 31 May 2022 18:02:14 +0300 Subject: [PATCH] First commit --- src/fastfield/mod.rs | 31 +++++++++++++++++++ src/indexer/segment_writer.rs | 7 +++++ src/postings/per_field_postings_writer.rs | 1 + src/postings/postings_writer.rs | 2 +- src/query/query_parser/query_parser.rs | 15 ++++++++- src/schema/field_entry.rs | 8 ++++- src/schema/field_type.rs | 37 ++++++++++++++++++++--- src/schema/term.rs | 21 +++++++++++++ src/schema/value.rs | 35 +++++++++++++++++++++ 9 files changed, 149 insertions(+), 8 deletions(-) diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 881cbc234..c1e878c57 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -160,6 +160,37 @@ impl FastValue for f64 { } } +impl FastValue for bool { + fn from_u64(val: u64) -> Self { + match val { + 0 => false, + _ => true, + } + } + + fn to_u64(&self) -> u64 { + match self { + false => 0, + true => 1, + } + } + + fn fast_field_cardinality(field_type: &FieldType) -> Option { + match *field_type { + FieldType::Bool(ref integer_options) => integer_options.get_fastfield_cardinality(), + _ => None, + } + } + + fn as_u64(&self) -> u64 { + *self as u64 + } + + fn to_type() -> Type { + Type::Bool + } +} + impl FastValue for DateTime { fn from_u64(timestamp_u64: u64) -> Self { let unix_timestamp = i64::from_u64(timestamp_u64); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index c1ae1c6e8..a424ca722 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -266,6 +266,13 @@ impl SegmentWriter { postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); } } + FieldType::Bool(_) => { + for value in values { + let bool_val = value.as_bool().ok_or_else(make_schema_error)?; + term_buffer.set_bool(bool_val); + postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); + } + } FieldType::Bytes(_) => { for value in values { let bytes = value.as_bytes().ok_or_else(make_schema_error)?; diff --git a/src/postings/per_field_postings_writer.rs b/src/postings/per_field_postings_writer.rs index 04966ab42..61d02752f 100644 --- a/src/postings/per_field_postings_writer.rs +++ b/src/postings/per_field_postings_writer.rs @@ -47,6 +47,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box Box::new(SpecializedPostingsWriter::::default()), diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index debd03208..808ca80c4 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -82,7 +82,7 @@ pub(crate) fn serialize_postings( .collect(); unordered_term_mappings.insert(field, mapping); } - FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {} + FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) | FieldType::Bool(_) => {} FieldType::Bytes(_) => {} FieldType::JsonObject(_) => {} } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index d0141833d..ef7d0f392 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::num::{ParseFloatError, ParseIntError}; use std::ops::Bound; -use std::str::FromStr; +use std::str::{FromStr, ParseBoolError}; use tantivy_query_grammar::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral}; @@ -46,6 +46,10 @@ pub enum QueryParserError { /// is not a f64. #[error("Invalid query: Only excluding terms given")] ExpectedFloat(#[from] ParseFloatError), + /// The query contains a term for a bool field, but the value + /// is not a bool. + #[error("Expected a bool value: '{0:?}'")] + ExpectedBool(#[from] ParseBoolError), /// It is forbidden queries that are only "excluding". (e.g. -title:pop) #[error("Invalid query: Only excluding terms given")] AllButQueryForbidden, @@ -346,6 +350,10 @@ impl QueryParser { let val: f64 = f64::from_str(phrase)?; Ok(Term::from_field_f64(field, val)) } + FieldType::Bool(_) => { + let val: bool = bool::from_str(phrase)?; + Ok(Term::from_field_bool(field, val)) + } FieldType::Date(_) => { let dt = OffsetDateTime::parse(phrase, &Rfc3339)?; Ok(Term::from_field_date(field, DateTime::from_utc(dt))) @@ -426,6 +434,11 @@ impl QueryParser { let f64_term = Term::from_field_f64(field, val); Ok(vec![LogicalLiteral::Term(f64_term)]) } + FieldType::Bool(_) => { + let val: bool = bool::from_str(phrase)?; + let bool_term = Term::from_field_bool(field, val); + Ok(vec![LogicalLiteral::Term(bool_term)]) + } FieldType::Date(_) => { let dt = OffsetDateTime::parse(phrase, &Rfc3339)?; let dt_term = Term::from_field_date(field, DateTime::from_utc(dt)); diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index b49016219..863ad2c5a 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -49,6 +49,11 @@ impl FieldEntry { Self::new(field_name, FieldType::F64(f64_options)) } + /// Creates a new bool field entry. + pub fn new_bool(field_name: String, bool_options: NumericOptions) -> FieldEntry { + Self::new(field_name, FieldType::Bool(bool_options)) + } + /// Creates a new date field entry. pub fn new_date(field_name: String, date_options: NumericOptions) -> FieldEntry { Self::new(field_name, FieldType::Date(date_options)) @@ -102,7 +107,8 @@ impl FieldEntry { FieldType::U64(ref options) | FieldType::I64(ref options) | FieldType::F64(ref options) - | FieldType::Date(ref options) => options.is_stored(), + | FieldType::Date(ref options) + | FieldType::Bool(ref options) => options.is_stored(), FieldType::Str(ref options) => options.is_stored(), FieldType::Facet(ref options) => options.is_stored(), FieldType::Bytes(ref options) => options.is_stored(), diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 39798e45e..1080d83fc 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -46,6 +46,8 @@ pub enum Type { I64 = b'i', /// `f64` F64 = b'f', + /// `bool` + Bool = b'o', /// `date(i64) timestamp` Date = b'd', /// `tantivy::schema::Facet`. Passed as a string in JSON. @@ -56,11 +58,12 @@ pub enum Type { Json = b'j', } -const ALL_TYPES: [Type; 8] = [ +const ALL_TYPES: [Type; 9] = [ Type::Str, Type::U64, Type::I64, Type::F64, + Type::Bool, Type::Date, Type::Facet, Type::Bytes, @@ -86,6 +89,7 @@ impl Type { Type::U64 => "U64", Type::I64 => "I64", Type::F64 => "F64", + Type::Bool => "Bool", Type::Date => "Date", Type::Facet => "Facet", Type::Bytes => "Bytes", @@ -101,6 +105,7 @@ impl Type { b'u' => Some(Type::U64), b'i' => Some(Type::I64), b'f' => Some(Type::F64), + b'o' => Some(Type::Bool), b'd' => Some(Type::Date), b'h' => Some(Type::Facet), b'b' => Some(Type::Bytes), @@ -125,6 +130,8 @@ pub enum FieldType { I64(NumericOptions), /// 64-bits float 64 field type configuration F64(NumericOptions), + /// Bool field type configuration + Bool(NumericOptions), /// Signed 64-bits Date 64 field type configuration, Date(NumericOptions), /// Hierachical Facet @@ -143,6 +150,7 @@ impl FieldType { FieldType::U64(_) => Type::U64, FieldType::I64(_) => Type::I64, FieldType::F64(_) => Type::F64, + FieldType::Bool(_) => Type::Bool, FieldType::Date(_) => Type::Date, FieldType::Facet(_) => Type::Facet, FieldType::Bytes(_) => Type::Bytes, @@ -156,7 +164,8 @@ impl FieldType { FieldType::Str(ref text_options) => text_options.get_indexing_options().is_some(), FieldType::U64(ref int_options) | FieldType::I64(ref int_options) - | FieldType::F64(ref int_options) => int_options.is_indexed(), + | FieldType::F64(ref int_options) + | FieldType::Bool(ref int_options) => int_options.is_indexed(), FieldType::Date(ref date_options) => date_options.is_indexed(), FieldType::Facet(ref _facet_options) => true, FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(), @@ -193,7 +202,8 @@ impl FieldType { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) - | FieldType::Date(ref int_options) => int_options.get_fastfield_cardinality().is_some(), + | FieldType::Date(ref int_options) + | FieldType::Bool(ref int_options) => int_options.get_fastfield_cardinality().is_some(), FieldType::Facet(_) => true, FieldType::JsonObject(_) => false, } @@ -209,7 +219,8 @@ impl FieldType { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) - | FieldType::Date(ref int_options) => int_options.fieldnorms(), + | FieldType::Date(ref int_options) + | FieldType::Bool(ref int_options) => int_options.fieldnorms(), FieldType::Facet(_) => false, FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(), FieldType::JsonObject(ref _json_object_options) => false, @@ -232,7 +243,8 @@ impl FieldType { FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) - | FieldType::Date(ref int_options) => { + | FieldType::Date(ref int_options) + | FieldType::Bool(ref int_options) => { if int_options.is_indexed() { Some(IndexRecordOption::Basic) } else { @@ -277,6 +289,10 @@ impl FieldType { json: JsonValue::String(field_text), }) } + FieldType::Bool(_) => Err(ValueParsingError::TypeError { + expected: "a boolean", + json: JsonValue::String(field_text), + }), FieldType::Facet(_) => Ok(Value::Facet(Facet::from(&field_text))), FieldType::Bytes(_) => base64::decode(&field_text) .map(Value::Bytes) @@ -318,6 +334,10 @@ impl FieldType { }) } } + FieldType::Bool(_) => Err(ValueParsingError::TypeError { + expected: "a boolean", + json: JsonValue::Number(field_val_num), + }), FieldType::Str(_) | FieldType::Facet(_) | FieldType::Bytes(_) => { Err(ValueParsingError::TypeError { expected: "a string", @@ -348,6 +368,13 @@ impl FieldType { json: JsonValue::Object(json_map), }), }, + JsonValue::Bool(json_bool_val) => match self { + FieldType::Bool(_) => Ok(Value::Bool(json_bool_val)), + _ => Err(ValueParsingError::TypeError { + expected: self.value_type().name(), + json: JsonValue::Bool(json_bool_val), + }), + }, _ => Err(ValueParsingError::TypeError { expected: self.value_type().name(), json: json.clone(), diff --git a/src/schema/term.rs b/src/schema/term.rs index 93a5806b2..ac6b8a012 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -69,6 +69,11 @@ impl Term { Term::from_fast_value(field, &val) } + /// Builds a term given a field, and a f64-value + pub fn from_field_bool(field: Field, val: bool) -> Term { + Term::from_fast_value(field, &val) + } + /// Builds a term given a field, and a DateTime value pub fn from_field_date(field: Field, val: DateTime) -> Term { Term::from_fast_value(field, &val) @@ -135,6 +140,11 @@ impl Term { self.set_fast_value(val); } + /// Sets a `bool` value in the term. + pub fn set_bool(&mut self, val: bool) { + self.set_fast_value(val); + } + /// Sets the value of a `Bytes` field. pub fn set_bytes(&mut self, bytes: &[u8]) { self.0.resize(5, 0u8); @@ -262,6 +272,14 @@ where B: AsRef<[u8]> self.get_fast_type::() } + /// Returns the `f64` value stored in a term. + /// + /// Returns None if the term is not of the f64 type, or if the term byte representation + /// is invalid. + pub fn as_bool(&self) -> Option { + self.get_fast_type::() + } + /// Returns the `Date` value stored in a term. /// /// Returns None if the term is not of the Date type, or if the term byte representation @@ -372,6 +390,9 @@ fn debug_value_bytes(typ: Type, bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Re Type::F64 => { write_opt(f, get_fast_type::(bytes))?; } + Type::Bool => { + write_opt(f, get_fast_type::(bytes))?; + } // TODO pretty print these types too. Type::Date => { write_opt(f, get_fast_type::(bytes))?; diff --git a/src/schema/value.rs b/src/schema/value.rs index fb2807958..5c3adcdca 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -22,6 +22,8 @@ pub enum Value { I64(i64), /// 64-bits Float `f64` F64(f64), + /// Bool value + Bool(bool), /// Date/time with second precision Date(DateTime), /// Facet @@ -43,6 +45,7 @@ impl Serialize for Value { Value::U64(u) => serializer.serialize_u64(u), Value::I64(u) => serializer.serialize_i64(u), Value::F64(u) => serializer.serialize_f64(u), + Value::Bool(b) => serializer.serialize_bool(b), Value::Date(ref date) => time::serde::rfc3339::serialize(&date.into_utc(), serializer), Value::Facet(ref facet) => facet.serialize(serializer), Value::Bytes(ref bytes) => serializer.serialize_bytes(bytes), @@ -75,6 +78,12 @@ impl<'de> Deserialize<'de> for Value { Ok(Value::F64(v)) } + fn visit_bool(self, v: bool) -> Result + where + E: serde::de::Error, { + Ok(Value::Bool(v)) + } + fn visit_str(self, v: &str) -> Result { Ok(Value::Str(v.to_owned())) } @@ -151,6 +160,17 @@ impl Value { } } + /// Returns the bool value, provided the value is of the `Bool` type. + /// + /// Return None if the value is not of type `Bool`. + pub fn as_bool(&self) -> Option { + if let Value::Bool(value) = self { + Some(*value) + } else { + None + } + } + /// Returns the Date-value, provided the value is of the `Date` type. /// /// Returns None if the value is not of type `Date`. @@ -209,6 +229,12 @@ impl From for Value { } } +impl From for Value { + fn from(b: bool) -> Self { + Value::Bool(b) + } +} + impl From for Value { fn from(dt: DateTime) -> Value { Value::Date(dt) @@ -281,6 +307,7 @@ mod binary_serialize { const F64_CODE: u8 = 6; const EXT_CODE: u8 = 7; const JSON_OBJ_CODE: u8 = 8; + const BOOL_CODE: u8 = 9; // extended types @@ -317,6 +344,10 @@ mod binary_serialize { F64_CODE.serialize(writer)?; f64_to_u64(*val).serialize(writer) } + Value::Bool(ref val) => { + BOOL_CODE.serialize(writer)?; + val.serialize(writer) + } Value::Date(ref val) => { DATE_CODE.serialize(writer)?; let DateTime { unix_timestamp } = val; @@ -357,6 +388,10 @@ mod binary_serialize { let value = u64_to_f64(u64::deserialize(reader)?); Ok(Value::F64(value)) } + BOOL_CODE => { + let value = bool::deserialize(reader)?; + Ok(Value::Bool(value)) + } DATE_CODE => { let unix_timestamp = i64::deserialize(reader)?; Ok(Value::Date(DateTime::from_unix_timestamp(unix_timestamp)))