mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
refactor Term
fixes some issues with Term Remove duplicate calls to truncate or resize Replace Magic Number 5 with constant Enforce minimum size of 5 for metadata Fix broken truncate docs use constructor instead new + set calls normalize constructor stack replace assert on internal behavior fixes #1585
This commit is contained in:
@@ -260,12 +260,8 @@ pub struct JsonTermWriter<'a> {
|
||||
}
|
||||
|
||||
impl<'a> JsonTermWriter<'a> {
|
||||
pub fn from_field_and_json_path(
|
||||
field: Field,
|
||||
json_path: &str,
|
||||
term_buffer: &'a mut Term,
|
||||
) -> Self {
|
||||
term_buffer.set_field(Type::Json, field);
|
||||
pub fn from_json_path(json_path: &str, field: Field, term_buffer: &'a mut Term) -> Self {
|
||||
term_buffer.set_field_and_type(field, Type::Json);
|
||||
let mut json_term_writer = Self::wrap(term_buffer);
|
||||
for segment in json_path.split('.') {
|
||||
json_term_writer.push_path_segment(segment);
|
||||
@@ -356,8 +352,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_json_writer() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("attributes");
|
||||
json_writer.push_path_segment("color");
|
||||
@@ -391,8 +386,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_string_term() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_str("red");
|
||||
@@ -405,8 +399,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_i64_term() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(-4i64);
|
||||
@@ -419,8 +412,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_u64_term() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(4u64);
|
||||
@@ -433,8 +425,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_f64_term() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(4.0f64);
|
||||
@@ -447,8 +438,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_bool_term() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(true);
|
||||
@@ -461,8 +451,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_push_after_set_path_segment() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("attribute");
|
||||
json_writer.set_str("something");
|
||||
@@ -477,8 +466,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_pop_segment() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.push_path_segment("hue");
|
||||
@@ -493,8 +481,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_json_writer_path() {
|
||||
let field = Field::from_field_id(1);
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, field);
|
||||
let mut json_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_writer.push_path_segment("color");
|
||||
assert_eq!(json_writer.path(), b"color");
|
||||
|
||||
@@ -114,7 +114,7 @@ impl SegmentWriter {
|
||||
fast_field_writers: FastFieldsWriter::from_schema(&schema),
|
||||
doc_opstamps: Vec::with_capacity(1_000),
|
||||
per_field_text_analyzers,
|
||||
term_buffer: Term::new(),
|
||||
term_buffer: Term::with_capacity(16),
|
||||
schema,
|
||||
})
|
||||
}
|
||||
@@ -178,7 +178,7 @@ impl SegmentWriter {
|
||||
let (term_buffer, ctx) = (&mut self.term_buffer, &mut self.ctx);
|
||||
let postings_writer: &mut dyn PostingsWriter =
|
||||
self.per_field_postings_writers.get_for_field_mut(field);
|
||||
term_buffer.set_field(field_entry.field_type().value_type(), field);
|
||||
term_buffer.clear_with_field_and_type(field_entry.field_type().value_type(), field);
|
||||
|
||||
match *field_entry.field_type() {
|
||||
FieldType::Facet(_) => {
|
||||
@@ -220,7 +220,7 @@ impl SegmentWriter {
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(term_buffer.as_slice().len(), 5);
|
||||
assert!(term_buffer.is_empty());
|
||||
postings_writer.index_text(
|
||||
doc_id,
|
||||
&mut *token_stream,
|
||||
@@ -543,8 +543,7 @@ mod tests {
|
||||
let inv_idx = segment_reader.inverted_index(json_field).unwrap();
|
||||
let term_dict = inv_idx.terms();
|
||||
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, json_field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||
let mut term_stream = term_dict.stream().unwrap();
|
||||
|
||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
||||
@@ -637,8 +636,7 @@ mod tests {
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0u32);
|
||||
let inv_index = segment_reader.inverted_index(json_field).unwrap();
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, json_field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_term_writer.push_path_segment("mykey");
|
||||
json_term_writer.set_str("token");
|
||||
@@ -682,8 +680,7 @@ mod tests {
|
||||
let searcher = reader.searcher();
|
||||
let segment_reader = searcher.segment_reader(0u32);
|
||||
let inv_index = segment_reader.inverted_index(json_field).unwrap();
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, json_field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_term_writer.push_path_segment("mykey");
|
||||
json_term_writer.set_str("two tokens");
|
||||
@@ -728,8 +725,7 @@ mod tests {
|
||||
writer.commit().unwrap();
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let mut term = Term::new();
|
||||
term.set_field(Type::Json, json_field);
|
||||
let mut term = Term::with_type_and_field(Type::Json, json_field);
|
||||
let mut json_term_writer = JsonTermWriter::wrap(&mut term);
|
||||
json_term_writer.push_path_segment("mykey");
|
||||
json_term_writer.push_path_segment("field");
|
||||
|
||||
@@ -734,9 +734,8 @@ fn generate_literals_for_json_object(
|
||||
index_record_option: IndexRecordOption,
|
||||
) -> Result<Vec<LogicalLiteral>, QueryParserError> {
|
||||
let mut logical_literals = Vec::new();
|
||||
let mut term = Term::new();
|
||||
let mut json_term_writer =
|
||||
JsonTermWriter::from_field_and_json_path(field, json_path, &mut term);
|
||||
let mut term = Term::with_capacity(100);
|
||||
let mut json_term_writer = JsonTermWriter::from_json_path(json_path, field, &mut term);
|
||||
if let Some(term) = convert_to_fast_value_and_get_term(&mut json_term_writer, phrase) {
|
||||
logical_literals.push(LogicalLiteral::Term(term));
|
||||
}
|
||||
|
||||
@@ -7,18 +7,6 @@ use crate::fastfield::FastValue;
|
||||
use crate::schema::{Facet, Type};
|
||||
use crate::{DatePrecision, DateTime};
|
||||
|
||||
/// Size (in bytes) of the buffer of a fast value (u64, i64, f64, or date) term.
|
||||
/// <field> + <type byte> + <value len>
|
||||
///
|
||||
/// - <field> is a big endian encoded u32 field id
|
||||
/// - <type_byte>'s most significant bit expresses whether the term is a json term or not
|
||||
/// The remaining 7 bits are used to encode the type of the value.
|
||||
/// If this is a JSON term, the type is the type of the leaf of the json.
|
||||
///
|
||||
/// - <value> is, if this is not the json term, a binary representation specific to the type.
|
||||
/// If it is a JSON Term, then it is prepended with the path that leads to this leaf value.
|
||||
const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8;
|
||||
|
||||
/// Separates the different segments of
|
||||
/// the json path.
|
||||
pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
|
||||
@@ -42,18 +30,50 @@ impl AsMut<Vec<u8>> for Term {
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of bytes used as metadata by `Term`.
|
||||
const TERM_METADATA_LENGTH: usize = 5;
|
||||
|
||||
impl Term {
|
||||
pub(crate) fn new() -> Term {
|
||||
Term(Vec::with_capacity(100))
|
||||
pub(crate) fn with_capacity(capacity: usize) -> Term {
|
||||
let mut data = Vec::with_capacity(TERM_METADATA_LENGTH + capacity);
|
||||
data.resize(TERM_METADATA_LENGTH, 0u8);
|
||||
Term(data)
|
||||
}
|
||||
|
||||
pub(crate) fn with_type_and_field(typ: Type, field: Field) -> Term {
|
||||
let mut term = Self::with_capacity(8);
|
||||
term.set_field_and_type(field, typ);
|
||||
term
|
||||
}
|
||||
|
||||
fn with_bytes_and_field_and_payload(typ: Type, field: Field, bytes: &[u8]) -> Term {
|
||||
let mut term = Self::with_capacity(bytes.len());
|
||||
term.set_field_and_type(field, typ);
|
||||
term.0.extend_from_slice(bytes);
|
||||
term
|
||||
}
|
||||
|
||||
fn from_fast_value<T: FastValue>(field: Field, val: &T) -> Term {
|
||||
let mut term = Term(vec![0u8; FAST_VALUE_TERM_LEN]);
|
||||
term.set_field(T::to_type(), field);
|
||||
let mut term = Self::with_type_and_field(T::to_type(), field);
|
||||
term.set_field_and_type(field, T::to_type());
|
||||
term.set_u64(val.to_u64());
|
||||
term
|
||||
}
|
||||
|
||||
/// Panics when there are byte values.
|
||||
///
|
||||
/// Sets field and the type.
|
||||
pub(crate) fn set_field_and_type(&mut self, field: Field, typ: Type) {
|
||||
assert_eq!(self.0.len(), TERM_METADATA_LENGTH);
|
||||
self.0[0..4].clone_from_slice(field.field_id().to_be_bytes().as_ref());
|
||||
self.0[4] = typ.to_code();
|
||||
}
|
||||
|
||||
/// Is empty if there are no value bytes.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.len() == TERM_METADATA_LENGTH
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a `u64`-value
|
||||
pub fn from_field_u64(field: Field, val: u64) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
@@ -82,31 +102,29 @@ impl Term {
|
||||
/// Creates a `Term` given a facet.
|
||||
pub fn from_facet(field: Field, facet: &Facet) -> Term {
|
||||
let facet_encoded_str = facet.encoded_str();
|
||||
Term::create_bytes_term(Type::Facet, field, facet_encoded_str.as_bytes())
|
||||
Term::with_bytes_and_field_and_payload(Type::Facet, field, facet_encoded_str.as_bytes())
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a string value
|
||||
pub fn from_field_text(field: Field, text: &str) -> Term {
|
||||
Term::create_bytes_term(Type::Str, field, text.as_bytes())
|
||||
}
|
||||
|
||||
fn create_bytes_term(typ: Type, field: Field, bytes: &[u8]) -> Term {
|
||||
let mut term = Term(vec![0u8; 5 + bytes.len()]);
|
||||
term.set_field(typ, field);
|
||||
term.0.extend_from_slice(bytes);
|
||||
term
|
||||
Term::with_bytes_and_field_and_payload(Type::Str, field, text.as_bytes())
|
||||
}
|
||||
|
||||
/// Builds a term bytes.
|
||||
pub fn from_field_bytes(field: Field, bytes: &[u8]) -> Term {
|
||||
Term::create_bytes_term(Type::Bytes, field, bytes)
|
||||
Term::with_bytes_and_field_and_payload(Type::Bytes, field, bytes)
|
||||
}
|
||||
|
||||
pub(crate) fn set_field(&mut self, typ: Type, field: Field) {
|
||||
self.0.clear();
|
||||
self.0
|
||||
.extend_from_slice(field.field_id().to_be_bytes().as_ref());
|
||||
self.0.push(typ.to_code());
|
||||
/// Removes the value_bytes and set the field and type code.
|
||||
pub(crate) fn clear_with_field_and_type(&mut self, typ: Type, field: Field) {
|
||||
self.truncate(TERM_METADATA_LENGTH);
|
||||
self.set_field_and_type(field, typ);
|
||||
}
|
||||
|
||||
/// Removes the value_bytes and set the type code.
|
||||
pub fn clear_with_type(&mut self, typ: Type) {
|
||||
self.truncate(TERM_METADATA_LENGTH);
|
||||
self.0[4] = typ.to_code();
|
||||
}
|
||||
|
||||
/// Sets a u64 value in the term.
|
||||
@@ -117,12 +135,6 @@ impl Term {
|
||||
/// the natural order of the values.
|
||||
pub fn set_u64(&mut self, val: u64) {
|
||||
self.set_fast_value(val);
|
||||
self.set_bytes(val.to_be_bytes().as_ref());
|
||||
}
|
||||
|
||||
fn set_fast_value<T: FastValue>(&mut self, val: T) {
|
||||
self.0.resize(FAST_VALUE_TERM_LEN, 0u8);
|
||||
self.set_bytes(val.to_u64().to_be_bytes().as_ref());
|
||||
}
|
||||
|
||||
/// Sets a `i64` value in the term.
|
||||
@@ -145,9 +157,13 @@ impl Term {
|
||||
self.set_fast_value(val);
|
||||
}
|
||||
|
||||
fn set_fast_value<T: FastValue>(&mut self, val: T) {
|
||||
self.set_bytes(val.to_u64().to_be_bytes().as_ref());
|
||||
}
|
||||
|
||||
/// Sets the value of a `Bytes` field.
|
||||
pub fn set_bytes(&mut self, bytes: &[u8]) {
|
||||
self.0.resize(5, 0u8);
|
||||
self.0.truncate(TERM_METADATA_LENGTH);
|
||||
self.0.extend(bytes);
|
||||
}
|
||||
|
||||
@@ -156,18 +172,13 @@ impl Term {
|
||||
self.set_bytes(text.as_bytes());
|
||||
}
|
||||
|
||||
/// Removes the value_bytes and set the type code.
|
||||
pub fn clear_with_type(&mut self, typ: Type) {
|
||||
self.truncate(5);
|
||||
self.0[4] = typ.to_code();
|
||||
}
|
||||
|
||||
/// Truncate the term right after the field and the type code.
|
||||
/// Truncates the term. The new length needs to be at least 5, which is reserved for metadata.
|
||||
pub fn truncate(&mut self, len: usize) {
|
||||
assert!(len >= TERM_METADATA_LENGTH);
|
||||
self.0.truncate(len);
|
||||
}
|
||||
|
||||
/// Truncate the term right after the field and the type code.
|
||||
/// Appends value bytes to the Term.
|
||||
pub fn append_bytes(&mut self, bytes: &[u8]) {
|
||||
self.0.extend_from_slice(bytes);
|
||||
}
|
||||
@@ -293,7 +304,7 @@ where B: AsRef<[u8]>
|
||||
/// Returns `None` if the field is not of string type
|
||||
/// or if the bytes are not valid utf-8.
|
||||
pub fn as_str(&self) -> Option<&str> {
|
||||
if self.as_slice().len() < 5 {
|
||||
if self.as_slice().len() < TERM_METADATA_LENGTH {
|
||||
return None;
|
||||
}
|
||||
if self.typ() != Type::Str {
|
||||
@@ -307,7 +318,7 @@ where B: AsRef<[u8]>
|
||||
/// Returns `None` if the field is not of facet type
|
||||
/// or if the bytes are not valid utf-8.
|
||||
pub fn as_facet(&self) -> Option<Facet> {
|
||||
if self.as_slice().len() < 5 {
|
||||
if self.as_slice().len() < TERM_METADATA_LENGTH {
|
||||
return None;
|
||||
}
|
||||
if self.typ() != Type::Facet {
|
||||
@@ -321,7 +332,7 @@ where B: AsRef<[u8]>
|
||||
///
|
||||
/// Returns `None` if the field is not of bytes type.
|
||||
pub fn as_bytes(&self) -> Option<&[u8]> {
|
||||
if self.as_slice().len() < 5 {
|
||||
if self.as_slice().len() < TERM_METADATA_LENGTH {
|
||||
return None;
|
||||
}
|
||||
if self.typ() != Type::Bytes {
|
||||
@@ -337,7 +348,7 @@ where B: AsRef<[u8]>
|
||||
/// If the term is a u64, its value is encoded according
|
||||
/// to `byteorder::LittleEndian`.
|
||||
pub fn value_bytes(&self) -> &[u8] {
|
||||
&self.0.as_ref()[5..]
|
||||
&self.0.as_ref()[TERM_METADATA_LENGTH..]
|
||||
}
|
||||
|
||||
/// Returns the underlying `&[u8]`.
|
||||
@@ -451,6 +462,18 @@ mod tests {
|
||||
assert_eq!(term.as_str(), Some("test"))
|
||||
}
|
||||
|
||||
/// Size (in bytes) of the buffer of a fast value (u64, i64, f64, or date) term.
|
||||
/// <field> + <type byte> + <value len>
|
||||
///
|
||||
/// - <field> is a big endian encoded u32 field id
|
||||
/// - <type_byte>'s most significant bit expresses whether the term is a json term or not
|
||||
/// The remaining 7 bits are used to encode the type of the value.
|
||||
/// If this is a JSON term, the type is the type of the leaf of the json.
|
||||
///
|
||||
/// - <value> is, if this is not the json term, a binary representation specific to the type.
|
||||
/// If it is a JSON Term, then it is prepended with the path that leads to this leaf value.
|
||||
const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8;
|
||||
|
||||
#[test]
|
||||
pub fn test_term_u64() {
|
||||
let mut schema_builder = Schema::builder();
|
||||
@@ -458,7 +481,7 @@ mod tests {
|
||||
let term = Term::from_field_u64(count_field, 983u64);
|
||||
assert_eq!(term.field(), count_field);
|
||||
assert_eq!(term.typ(), Type::U64);
|
||||
assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.as_slice().len(), FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.as_u64(), Some(983u64))
|
||||
}
|
||||
|
||||
@@ -469,7 +492,7 @@ mod tests {
|
||||
let term = Term::from_field_bool(bool_field, true);
|
||||
assert_eq!(term.field(), bool_field);
|
||||
assert_eq!(term.typ(), Type::Bool);
|
||||
assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.as_slice().len(), FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.as_bool(), Some(true))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user