fix ReferenceValue API flaw (#2372)

* fix ReferenceValue API flaw

Remove `Facet` and `TokenizedString` values from the `ReferenceValue` API,
as this requires the trait value to have them stored somewhere.

Since `TokenizedString` is quite niche, I just copy it into a Box,
instead of designing a reference API around it.

* fix comment link
This commit is contained in:
PSeitz
2024-05-09 06:14:42 +02:00
committed by GitHub
parent 8cd7ddc535
commit 71f3b4e4e3
8 changed files with 35 additions and 30 deletions

View File

@@ -146,8 +146,11 @@ mod tests {
facet_ords.extend(facet_reader.facet_ords(0u32));
assert_eq!(&facet_ords, &[0u64]);
let doc = searcher.doc::<TantivyDocument>(DocAddress::new(0u32, 0u32))?;
let value: Option<&Facet> = doc.get_first(facet_field).and_then(|v| v.as_facet());
assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
let value: Option<Facet> = doc
.get_first(facet_field)
.and_then(|v| v.as_facet())
.map(|facet| Facet::from_encoded_string(facet.to_string()));
assert_eq!(value, Facet::from_text("/a/b").ok());
Ok(())
}

View File

@@ -183,8 +183,7 @@ impl FastFieldsWriter {
.record_datetime(doc_id, field_name, truncated_datetime);
}
ReferenceValueLeaf::Facet(val) => {
self.columnar_writer
.record_str(doc_id, field_name, val.encoded_str());
self.columnar_writer.record_str(doc_id, field_name, val);
}
ReferenceValueLeaf::Bytes(val) => {
self.columnar_writer.record_bytes(doc_id, field_name, val);

View File

@@ -206,8 +206,7 @@ impl SegmentWriter {
// Used to help with linting and type checking.
let value = value_access as D::Value<'_>;
let facet = value.as_facet().ok_or_else(make_schema_error)?;
let facet_str = facet.encoded_str();
let facet_str = value.as_facet().ok_or_else(make_schema_error)?;
let mut facet_tokenizer = facet_tokenizer.token_stream(facet_str);
let mut indexing_position = IndexingPosition::default();
postings_writer.index_text(
@@ -230,7 +229,7 @@ impl SegmentWriter {
&mut self.per_field_text_analyzers[field.field_id() as usize];
text_analyzer.token_stream(text)
} else if let Some(tok_str) = value.as_pre_tokenized_text() {
BoxTokenStream::new(PreTokenizedStream::from(tok_str.clone()))
BoxTokenStream::new(PreTokenizedStream::from(*tok_str.clone()))
} else {
continue;
};

View File

@@ -180,7 +180,7 @@ impl MoreLikeThis {
let facets: Vec<&str> = values
.iter()
.map(|value| {
value.as_facet().map(|f| f.encoded_str()).ok_or_else(|| {
value.as_facet().ok_or_else(|| {
TantivyError::InvalidArgument("invalid field value".to_string())
})
})
@@ -220,7 +220,7 @@ impl MoreLikeThis {
let mut token_stream = tokenizer.token_stream(text);
token_stream.process(sink);
} else if let Some(tok_str) = value.as_pre_tokenized_text() {
let mut token_stream = PreTokenizedStream::from(tok_str.clone());
let mut token_stream = PreTokenizedStream::from(*tok_str.clone());
token_stream.process(sink);
}
}

View File

@@ -873,7 +873,7 @@ mod tests {
);
let facet = Facet::from_text("/hello/world").unwrap();
let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::Facet(facet));
@@ -881,7 +881,8 @@ mod tests {
text: "hello, world".to_string(),
tokens: vec![Token::default(), Token::default()],
};
let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
let result =
serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
let value = deserialize_value(result);
assert_eq!(value, crate::schema::OwnedValue::PreTokStr(pre_tok_str));
}

View File

@@ -65,13 +65,13 @@ impl<'a> Value<'a> for &'a OwnedValue {
match self {
OwnedValue::Null => ReferenceValueLeaf::Null.into(),
OwnedValue::Str(val) => ReferenceValueLeaf::Str(val).into(),
OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val).into(),
OwnedValue::PreTokStr(val) => ReferenceValueLeaf::PreTokStr(val.clone().into()).into(),
OwnedValue::U64(val) => ReferenceValueLeaf::U64(*val).into(),
OwnedValue::I64(val) => ReferenceValueLeaf::I64(*val).into(),
OwnedValue::F64(val) => ReferenceValueLeaf::F64(*val).into(),
OwnedValue::Bool(val) => ReferenceValueLeaf::Bool(*val).into(),
OwnedValue::Date(val) => ReferenceValueLeaf::Date(*val).into(),
OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val).into(),
OwnedValue::Facet(val) => ReferenceValueLeaf::Facet(val.encoded_str()).into(),
OwnedValue::Bytes(val) => ReferenceValueLeaf::Bytes(val).into(),
OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(),
OwnedValue::Array(array) => ReferenceValue::Array(array.iter()),
@@ -277,11 +277,13 @@ impl<'a, V: Value<'a>> From<ReferenceValue<'a, V>> for OwnedValue {
ReferenceValueLeaf::I64(val) => OwnedValue::I64(val),
ReferenceValueLeaf::F64(val) => OwnedValue::F64(val),
ReferenceValueLeaf::Date(val) => OwnedValue::Date(val),
ReferenceValueLeaf::Facet(val) => OwnedValue::Facet(val.clone()),
ReferenceValueLeaf::Facet(val) => {
OwnedValue::Facet(Facet::from_encoded_string(val.to_string()))
}
ReferenceValueLeaf::Bytes(val) => OwnedValue::Bytes(val.to_vec()),
ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val),
ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val),
ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(val.clone()),
ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()),
},
ReferenceValue::Array(val) => {
OwnedValue::Array(val.map(|v| v.as_value().into()).collect())

View File

@@ -121,7 +121,7 @@ where W: Write
ReferenceValueLeaf::Facet(val) => {
self.write_type_code(type_codes::HIERARCHICAL_FACET_CODE)?;
val.serialize(self.writer)
Cow::Borrowed(val).serialize(self.writer)
}
ReferenceValueLeaf::Bytes(val) => {
self.write_type_code(type_codes::BYTES_CODE)?;
@@ -428,7 +428,7 @@ mod tests {
);
let facet = Facet::from_text("/hello/world").unwrap();
let result = serialize_value(ReferenceValueLeaf::Facet(&facet).into());
let result = serialize_value(ReferenceValueLeaf::Facet(facet.encoded_str()).into());
let expected = binary_repr!(
type_codes::HIERARCHICAL_FACET_CODE => Facet::from_text("/hello/world").unwrap(),
);
@@ -441,7 +441,8 @@ mod tests {
text: "hello, world".to_string(),
tokens: vec![Token::default(), Token::default()],
};
let result = serialize_value(ReferenceValueLeaf::PreTokStr(&pre_tok_str).into());
let result =
serialize_value(ReferenceValueLeaf::PreTokStr(pre_tok_str.clone().into()).into());
let expected = binary_repr!(
type_codes::EXT_CODE, type_codes::TOK_STR_EXT_CODE => pre_tok_str,
);

View File

@@ -3,7 +3,6 @@ use std::net::Ipv6Addr;
use common::DateTime;
use crate::schema::Facet;
use crate::tokenizer::PreTokenizedString;
/// A single field value.
@@ -82,7 +81,7 @@ pub trait Value<'a>: Send + Sync + Debug {
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
fn as_pre_tokenized_text(&self) -> Option<Box<PreTokenizedString>> {
self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
}
@@ -94,7 +93,7 @@ pub trait Value<'a>: Send + Sync + Debug {
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
fn as_facet(&self) -> Option<&'a Facet> {
fn as_facet(&self) -> Option<&'a str> {
self.as_leaf().and_then(|leaf| leaf.as_facet())
}
@@ -132,7 +131,7 @@ pub trait Value<'a>: Send + Sync + Debug {
}
/// A enum representing a leaf value for tantivy to index.
#[derive(Clone, Copy, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq)]
pub enum ReferenceValueLeaf<'a> {
/// A null value.
Null,
@@ -146,8 +145,9 @@ pub enum ReferenceValueLeaf<'a> {
F64(f64),
/// Date/time with nanoseconds precision
Date(DateTime),
/// Facet
Facet(&'a Facet),
/// Facet string needs to match the format of
/// [Facet::encoded_str](crate::schema::Facet::encoded_str).
Facet(&'a str),
/// Arbitrarily sized byte array
Bytes(&'a [u8]),
/// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`.
@@ -155,7 +155,7 @@ pub enum ReferenceValueLeaf<'a> {
/// Bool value
Bool(bool),
/// Pre-tokenized str type,
PreTokStr(&'a PreTokenizedString),
PreTokStr(Box<PreTokenizedString>),
}
impl<'a, T: Value<'a> + ?Sized> From<ReferenceValueLeaf<'a>> for ReferenceValue<'a, T> {
@@ -261,9 +261,9 @@ impl<'a> ReferenceValueLeaf<'a> {
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
pub fn as_pre_tokenized_text(&self) -> Option<Box<PreTokenizedString>> {
if let Self::PreTokStr(val) = self {
Some(val)
Some(val.clone())
} else {
None
}
@@ -281,7 +281,7 @@ impl<'a> ReferenceValueLeaf<'a> {
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
pub fn as_facet(&self) -> Option<&'a Facet> {
pub fn as_facet(&self) -> Option<&'a str> {
if let Self::Facet(val) = self {
Some(val)
} else {
@@ -367,7 +367,7 @@ where V: Value<'a>
#[inline]
/// If the Value is a pre-tokenized string, returns the associated string. Returns None
/// otherwise.
pub fn as_pre_tokenized_text(&self) -> Option<&'a PreTokenizedString> {
pub fn as_pre_tokenized_text(&self) -> Option<Box<PreTokenizedString>> {
self.as_leaf().and_then(|leaf| leaf.as_pre_tokenized_text())
}
@@ -379,7 +379,7 @@ where V: Value<'a>
#[inline]
/// If the Value is a facet, returns the associated facet. Returns None otherwise.
pub fn as_facet(&self) -> Option<&'a Facet> {
pub fn as_facet(&self) -> Option<&'a str> {
self.as_leaf().and_then(|leaf| leaf.as_facet())
}