diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index c4e170352..731242779 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -146,8 +146,11 @@ mod tests { facet_ords.extend(facet_reader.facet_ords(0u32)); assert_eq!(&facet_ords, &[0u64]); let doc = searcher.doc::(DocAddress::new(0u32, 0u32))?; - let value: Option<&Facet> = doc.get_first(facet_field).and_then(|v| v.as_facet()); - assert_eq!(value, Facet::from_text("/a/b").ok().as_ref()); + let value: Option = doc + .get_first(facet_field) + .and_then(|v| v.as_facet()) + .map(|facet| Facet::from_encoded_string(facet.to_string())); + assert_eq!(value, Facet::from_text("/a/b").ok()); Ok(()) } diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 8212f2b2f..2f4196078 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -183,8 +183,7 @@ impl FastFieldsWriter { .record_datetime(doc_id, field_name, truncated_datetime); } ReferenceValueLeaf::Facet(val) => { - self.columnar_writer - .record_str(doc_id, field_name, val.encoded_str()); + self.columnar_writer.record_str(doc_id, field_name, val); } ReferenceValueLeaf::Bytes(val) => { self.columnar_writer.record_bytes(doc_id, field_name, val); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 2ee2843c0..0e1be366c 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -206,8 +206,7 @@ impl SegmentWriter { // Used to help with linting and type checking. let value = value_access as D::Value<'_>; - let facet = value.as_facet().ok_or_else(make_schema_error)?; - let facet_str = facet.encoded_str(); + let facet_str = value.as_facet().ok_or_else(make_schema_error)?; let mut facet_tokenizer = facet_tokenizer.token_stream(facet_str); let mut indexing_position = IndexingPosition::default(); postings_writer.index_text( @@ -230,7 +229,7 @@ impl SegmentWriter { &mut self.per_field_text_analyzers[field.field_id() as usize]; text_analyzer.token_stream(text) } else if let Some(tok_str) = value.as_pre_tokenized_text() { - BoxTokenStream::new(PreTokenizedStream::from(tok_str.clone())) + BoxTokenStream::new(PreTokenizedStream::from(*tok_str.clone())) } else { continue; }; diff --git a/src/query/more_like_this/more_like_this.rs b/src/query/more_like_this/more_like_this.rs index 4fb692e9d..043d081df 100644 --- a/src/query/more_like_this/more_like_this.rs +++ b/src/query/more_like_this/more_like_this.rs @@ -180,7 +180,7 @@ impl MoreLikeThis { let facets: Vec<&str> = values .iter() .map(|value| { - value.as_facet().map(|f| f.encoded_str()).ok_or_else(|| { + value.as_facet().ok_or_else(|| { TantivyError::InvalidArgument("invalid field value".to_string()) }) }) @@ -220,7 +220,7 @@ impl MoreLikeThis { let mut token_stream = tokenizer.token_stream(text); token_stream.process(sink); } else if let Some(tok_str) = value.as_pre_tokenized_text() { - let mut token_stream = PreTokenizedStream::from(tok_str.clone()); + let mut token_stream = PreTokenizedStream::from(*tok_str.clone()); token_stream.process(sink); } } diff --git a/src/schema/document/de.rs b/src/schema/document/de.rs index aab2b070e..e80bff2c9 100644 --- a/src/schema/document/de.rs +++ b/src/schema/document/de.rs @@ -873,7 +873,7 @@ mod tests { ); let facet = Facet::from_text("/hello/world").unwrap(); - let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into()); + let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::Facet(facet)); @@ -881,7 +881,8 @@ mod tests { text: "hello, world".to_string(), tokens: vec![Token::default(), Token::default()], }; - let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into()); + let result = + serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into()); let value = deserialize_value(result); assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str)); } diff --git a/src/schema/document/owned_value.rs b/src/schema/document/owned_value.rs index 48d3f8792..a70eb7d1c 100644 --- a/src/schema/document/owned_value.rs +++ b/src/schema/document/owned_value.rs @@ -65,13 +65,13 @@ impl<'a> Value<'a> for &'a OwnedValue { match self { OwnedValue::Null => ReferenceValueLeaf::Null.into(), OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(), - OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(), + OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val.clone().into()).into(), OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(), OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(), OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(), OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(), OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(), - OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(), + OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val.encoded_str()).into(), OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(), OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(), OwnedValue::Array(array) => ReferenceValue::Array(array.iter()), @@ -277,11 +277,13 @@ impl<'a, V: Value<'a>> From> for OwnedValue { ReferenceValueLeaf::I64(val) => OwnedValue::I64(val), ReferenceValueLeaf::F64(val) => OwnedValue::F64(val), ReferenceValueLeaf::Date(val) => OwnedValue::Date(val), - ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()), + ReferenceValueLeaf::Facet(val) => { + OwnedValue::Facet(Facet::from_encoded_string(val.to_string())) + } ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()), ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val), ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val), - ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()), + ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()), }, ReferenceValue::Array(val) => { OwnedValue::Array(val.map(|v| v.as_value().into()).collect()) diff --git a/src/schema/document/se.rs b/src/schema/document/se.rs index 8acffb36b..f1eed1027 100644 --- a/src/schema/document/se.rs +++ b/src/schema/document/se.rs @@ -121,7 +121,7 @@ where W: Write ReferenceValueLeaf::Facet(val) => { self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?; - val.serialize(self.writer) + Cow::Borrowed(val).serialize(self.writer) } ReferenceValueLeaf::Bytes(val) => { self.write_type_code(type_codes::BYTES_CODE)?; @@ -428,7 +428,7 @@ mod tests { ); let facet = Facet::from_text("/hello/world").unwrap(); - let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into()); + let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into()); let expected = binary_repr!( type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(), ); @@ -441,7 +441,8 @@ mod tests { text: "hello, world".to_string(), tokens: vec![Token::default(), Token::default()], }; - let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into()); + let result = + serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into()); let expected = binary_repr!( type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str, ); diff --git a/src/schema/document/value.rs b/src/schema/document/value.rs index ca3d3bf1b..e05649ad7 100644 --- a/src/schema/document/value.rs +++ b/src/schema/document/value.rs @@ -3,7 +3,6 @@ use std::net::Ipv6Addr; use common::DateTime; -use crate::schema::Facet; use crate::tokenizer::PreTokenizedString; /// A single field value. @@ -82,7 +81,7 @@ pub trait Value<'a>: Send + Sync + Debug { #[inline] /// If the Value is a pre-tokenized string, returns the associated string. Returns None /// otherwise. - fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> { + fn as_pre_tokenized_text(&self) -> Option> { self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text()) } @@ -94,7 +93,7 @@ pub trait Value<'a>: Send + Sync + Debug { #[inline] /// If the Value is a facet, returns the associated facet. Returns None otherwise. - fn as_facet(&self) -> Option<&'a Facet> { + fn as_facet(&self) -> Option<&'a str> { self.as_leaf().and_then(|leaf| leaf.as_facet()) } @@ -132,7 +131,7 @@ pub trait Value<'a>: Send + Sync + Debug { } /// A enum representing a leaf value for tantivy to index. -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum ReferenceValueLeaf<'a> { /// A null value. Null, @@ -146,8 +145,9 @@ pub enum ReferenceValueLeaf<'a> { F64(f64), /// Date/time with nanoseconds precision Date(DateTime), - /// Facet - Facet(&'a Facet), + /// Facet string needs to match the format of + /// [Facet::encoded_str](crate::schema::Facet::encoded_str). + Facet(&'a str), /// Arbitrarily sized byte array Bytes(&'a [u8]), /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. @@ -155,7 +155,7 @@ pub enum ReferenceValueLeaf<'a> { /// Bool value Bool(bool), /// Pre-tokenized str type, - PreTokStr(&'a PreTokenizedString), + PreTokStr(Box), } impl<'a, T: Value<'a> + ?Sized> From> for ReferenceValue<'a, T> { @@ -261,9 +261,9 @@ impl<'a> ReferenceValueLeaf<'a> { #[inline] /// If the Value is a pre-tokenized string, returns the associated string. Returns None /// otherwise. - pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> { + pub fn as_pre_tokenized_text(&self) -> Option> { if let Self::PreTokStr(val) = self { - Some(val) + Some(val.clone()) } else { None } @@ -281,7 +281,7 @@ impl<'a> ReferenceValueLeaf<'a> { #[inline] /// If the Value is a facet, returns the associated facet. Returns None otherwise. - pub fn as_facet(&self) -> Option<&'a Facet> { + pub fn as_facet(&self) -> Option<&'a str> { if let Self::Facet(val) = self { Some(val) } else { @@ -367,7 +367,7 @@ where V: Value<'a> #[inline] /// If the Value is a pre-tokenized string, returns the associated string. Returns None /// otherwise. - pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> { + pub fn as_pre_tokenized_text(&self) -> Option> { self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text()) } @@ -379,7 +379,7 @@ where V: Value<'a> #[inline] /// If the Value is a facet, returns the associated facet. Returns None otherwise. - pub fn as_facet(&self) -> Option<&'a Facet> { + pub fn as_facet(&self) -> Option<&'a str> { self.as_leaf().and_then(|leaf| leaf.as_facet()) }