From fcfd76ec5517e9afc25e7b73f8d47dfd25263ead Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Tue, 11 Oct 2022 19:17:31 +0800 Subject: [PATCH 1/3] refactor Term fixes some issues with Term Remove duplicate calls to truncate or resize Replace Magic Number 5 with constant Enforce minimum size of 5 for metadata Fix broken truncate docs use constructor instead new + set calls normalize constructor stack replace assert on internal behavior fixes #1585 --- src/indexer/json_term_writer.rs | 35 +++---- src/indexer/segment_writer.rs | 18 ++-- src/query/query_parser/query_parser.rs | 5 +- src/schema/term.rs | 127 +++++++++++++++---------- 4 files changed, 95 insertions(+), 90 deletions(-) diff --git a/src/indexer/json_term_writer.rs b/src/indexer/json_term_writer.rs index bee16df52..2a08afaaf 100644 --- a/src/indexer/json_term_writer.rs +++ b/src/indexer/json_term_writer.rs @@ -260,12 +260,8 @@ pub struct JsonTermWriter<'a> { } impl<'a> JsonTermWriter<'a> { - pub fn from_field_and_json_path( - field: Field, - json_path: &str, - term_buffer: &'a mut Term, - ) -> Self { - term_buffer.set_field(Type::Json, field); + pub fn from_json_path(json_path: &str, field: Field, term_buffer: &'a mut Term) -> Self { + term_buffer.set_field_and_type(field, Type::Json); let mut json_term_writer = Self::wrap(term_buffer); for segment in json_path.split('.') { json_term_writer.push_path_segment(segment); @@ -356,8 +352,7 @@ mod tests { #[test] fn test_json_writer() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("attributes"); json_writer.push_path_segment("color"); @@ -391,8 +386,7 @@ mod tests { #[test] fn test_string_term() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); json_writer.set_str("red"); @@ -405,8 +399,7 @@ mod tests { #[test] fn test_i64_term() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); json_writer.set_fast_value(-4i64); @@ -419,8 +412,7 @@ mod tests { #[test] fn test_u64_term() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); json_writer.set_fast_value(4u64); @@ -433,8 +425,7 @@ mod tests { #[test] fn test_f64_term() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); json_writer.set_fast_value(4.0f64); @@ -447,8 +438,7 @@ mod tests { #[test] fn test_bool_term() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); json_writer.set_fast_value(true); @@ -461,8 +451,7 @@ mod tests { #[test] fn test_push_after_set_path_segment() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("attribute"); json_writer.set_str("something"); @@ -477,8 +466,7 @@ mod tests { #[test] fn test_pop_segment() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); json_writer.push_path_segment("hue"); @@ -493,8 +481,7 @@ mod tests { #[test] fn test_json_writer_path() { let field = Field::from_field_id(1); - let mut term = Term::new(); - term.set_field(Type::Json, field); + let mut term = Term::with_type_and_field(Type::Json, field); let mut json_writer = JsonTermWriter::wrap(&mut term); json_writer.push_path_segment("color"); assert_eq!(json_writer.path(), b"color"); diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 6b87afe35..29e9071b7 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -114,7 +114,7 @@ impl SegmentWriter { fast_field_writers: FastFieldsWriter::from_schema(&schema), doc_opstamps: Vec::with_capacity(1_000), per_field_text_analyzers, - term_buffer: Term::new(), + term_buffer: Term::with_capacity(16), schema, }) } @@ -178,7 +178,7 @@ impl SegmentWriter { let (term_buffer, ctx) = (&mut self.term_buffer, &mut self.ctx); let postings_writer: &mut dyn PostingsWriter = self.per_field_postings_writers.get_for_field_mut(field); - term_buffer.set_field(field_entry.field_type().value_type(), field); + term_buffer.clear_with_field_and_type(field_entry.field_type().value_type(), field); match *field_entry.field_type() { FieldType::Facet(_) => { @@ -220,7 +220,7 @@ impl SegmentWriter { } }; - assert_eq!(term_buffer.as_slice().len(), 5); + assert!(term_buffer.is_empty()); postings_writer.index_text( doc_id, &mut *token_stream, @@ -543,8 +543,7 @@ mod tests { let inv_idx = segment_reader.inverted_index(json_field).unwrap(); let term_dict = inv_idx.terms(); - let mut term = Term::new(); - term.set_field(Type::Json, json_field); + let mut term = Term::with_type_and_field(Type::Json, json_field); let mut term_stream = term_dict.stream().unwrap(); let mut json_term_writer = JsonTermWriter::wrap(&mut term); @@ -637,8 +636,7 @@ mod tests { let searcher = reader.searcher(); let segment_reader = searcher.segment_reader(0u32); let inv_index = segment_reader.inverted_index(json_field).unwrap(); - let mut term = Term::new(); - term.set_field(Type::Json, json_field); + let mut term = Term::with_type_and_field(Type::Json, json_field); let mut json_term_writer = JsonTermWriter::wrap(&mut term); json_term_writer.push_path_segment("mykey"); json_term_writer.set_str("token"); @@ -682,8 +680,7 @@ mod tests { let searcher = reader.searcher(); let segment_reader = searcher.segment_reader(0u32); let inv_index = segment_reader.inverted_index(json_field).unwrap(); - let mut term = Term::new(); - term.set_field(Type::Json, json_field); + let mut term = Term::with_type_and_field(Type::Json, json_field); let mut json_term_writer = JsonTermWriter::wrap(&mut term); json_term_writer.push_path_segment("mykey"); json_term_writer.set_str("two tokens"); @@ -728,8 +725,7 @@ mod tests { writer.commit().unwrap(); let reader = index.reader().unwrap(); let searcher = reader.searcher(); - let mut term = Term::new(); - term.set_field(Type::Json, json_field); + let mut term = Term::with_type_and_field(Type::Json, json_field); let mut json_term_writer = JsonTermWriter::wrap(&mut term); json_term_writer.push_path_segment("mykey"); json_term_writer.push_path_segment("field"); diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index d14a09f21..355d224d0 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -734,9 +734,8 @@ fn generate_literals_for_json_object( index_record_option: IndexRecordOption, ) -> Result, QueryParserError> { let mut logical_literals = Vec::new(); - let mut term = Term::new(); - let mut json_term_writer = - JsonTermWriter::from_field_and_json_path(field, json_path, &mut term); + let mut term = Term::with_capacity(100); + let mut json_term_writer = JsonTermWriter::from_json_path(json_path, field, &mut term); if let Some(term) = convert_to_fast_value_and_get_term(&mut json_term_writer, phrase) { logical_literals.push(LogicalLiteral::Term(term)); } diff --git a/src/schema/term.rs b/src/schema/term.rs index 9bfa7614b..a664741c0 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -7,18 +7,6 @@ use crate::fastfield::FastValue; use crate::schema::{Facet, Type}; use crate::{DatePrecision, DateTime}; -/// Size (in bytes) of the buffer of a fast value (u64, i64, f64, or date) term. -/// + + -/// -/// - is a big endian encoded u32 field id -/// - 's most significant bit expresses whether the term is a json term or not -/// The remaining 7 bits are used to encode the type of the value. -/// If this is a JSON term, the type is the type of the leaf of the json. -/// -/// - is, if this is not the json term, a binary representation specific to the type. -/// If it is a JSON Term, then it is prepended with the path that leads to this leaf value. -const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8; - /// Separates the different segments of /// the json path. pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8; @@ -42,18 +30,50 @@ impl AsMut> for Term { } } +/// The number of bytes used as metadata by `Term`. +const TERM_METADATA_LENGTH: usize = 5; + impl Term { - pub(crate) fn new() -> Term { - Term(Vec::with_capacity(100)) + pub(crate) fn with_capacity(capacity: usize) -> Term { + let mut data = Vec::with_capacity(TERM_METADATA_LENGTH + capacity); + data.resize(TERM_METADATA_LENGTH, 0u8); + Term(data) + } + + pub(crate) fn with_type_and_field(typ: Type, field: Field) -> Term { + let mut term = Self::with_capacity(8); + term.set_field_and_type(field, typ); + term + } + + fn with_bytes_and_field_and_payload(typ: Type, field: Field, bytes: &[u8]) -> Term { + let mut term = Self::with_capacity(bytes.len()); + term.set_field_and_type(field, typ); + term.0.extend_from_slice(bytes); + term } fn from_fast_value(field: Field, val: &T) -> Term { - let mut term = Term(vec![0u8; FAST_VALUE_TERM_LEN]); - term.set_field(T::to_type(), field); + let mut term = Self::with_type_and_field(T::to_type(), field); + term.set_field_and_type(field, T::to_type()); term.set_u64(val.to_u64()); term } + /// Panics when there are byte values. + /// + /// Sets field and the type. + pub(crate) fn set_field_and_type(&mut self, field: Field, typ: Type) { + assert_eq!(self.0.len(), TERM_METADATA_LENGTH); + self.0[0..4].clone_from_slice(field.field_id().to_be_bytes().as_ref()); + self.0[4] = typ.to_code(); + } + + /// Is empty if there are no value bytes. + pub fn is_empty(&self) -> bool { + self.0.len() == TERM_METADATA_LENGTH + } + /// Builds a term given a field, and a `u64`-value pub fn from_field_u64(field: Field, val: u64) -> Term { Term::from_fast_value(field, &val) @@ -82,31 +102,29 @@ impl Term { /// Creates a `Term` given a facet. pub fn from_facet(field: Field, facet: &Facet) -> Term { let facet_encoded_str = facet.encoded_str(); - Term::create_bytes_term(Type::Facet, field, facet_encoded_str.as_bytes()) + Term::with_bytes_and_field_and_payload(Type::Facet, field, facet_encoded_str.as_bytes()) } /// Builds a term given a field, and a string value pub fn from_field_text(field: Field, text: &str) -> Term { - Term::create_bytes_term(Type::Str, field, text.as_bytes()) - } - - fn create_bytes_term(typ: Type, field: Field, bytes: &[u8]) -> Term { - let mut term = Term(vec![0u8; 5 + bytes.len()]); - term.set_field(typ, field); - term.0.extend_from_slice(bytes); - term + Term::with_bytes_and_field_and_payload(Type::Str, field, text.as_bytes()) } /// Builds a term bytes. pub fn from_field_bytes(field: Field, bytes: &[u8]) -> Term { - Term::create_bytes_term(Type::Bytes, field, bytes) + Term::with_bytes_and_field_and_payload(Type::Bytes, field, bytes) } - pub(crate) fn set_field(&mut self, typ: Type, field: Field) { - self.0.clear(); - self.0 - .extend_from_slice(field.field_id().to_be_bytes().as_ref()); - self.0.push(typ.to_code()); + /// Removes the value_bytes and set the field and type code. + pub(crate) fn clear_with_field_and_type(&mut self, typ: Type, field: Field) { + self.truncate(TERM_METADATA_LENGTH); + self.set_field_and_type(field, typ); + } + + /// Removes the value_bytes and set the type code. + pub fn clear_with_type(&mut self, typ: Type) { + self.truncate(TERM_METADATA_LENGTH); + self.0[4] = typ.to_code(); } /// Sets a u64 value in the term. @@ -117,12 +135,6 @@ impl Term { /// the natural order of the values. pub fn set_u64(&mut self, val: u64) { self.set_fast_value(val); - self.set_bytes(val.to_be_bytes().as_ref()); - } - - fn set_fast_value(&mut self, val: T) { - self.0.resize(FAST_VALUE_TERM_LEN, 0u8); - self.set_bytes(val.to_u64().to_be_bytes().as_ref()); } /// Sets a `i64` value in the term. @@ -145,9 +157,13 @@ impl Term { self.set_fast_value(val); } + fn set_fast_value(&mut self, val: T) { + self.set_bytes(val.to_u64().to_be_bytes().as_ref()); + } + /// Sets the value of a `Bytes` field. pub fn set_bytes(&mut self, bytes: &[u8]) { - self.0.resize(5, 0u8); + self.0.truncate(TERM_METADATA_LENGTH); self.0.extend(bytes); } @@ -156,18 +172,13 @@ impl Term { self.set_bytes(text.as_bytes()); } - /// Removes the value_bytes and set the type code. - pub fn clear_with_type(&mut self, typ: Type) { - self.truncate(5); - self.0[4] = typ.to_code(); - } - - /// Truncate the term right after the field and the type code. + /// Truncates the term. The new length needs to be at least 5, which is reserved for metadata. pub fn truncate(&mut self, len: usize) { + assert!(len >= TERM_METADATA_LENGTH); self.0.truncate(len); } - /// Truncate the term right after the field and the type code. + /// Appends value bytes to the Term. pub fn append_bytes(&mut self, bytes: &[u8]) { self.0.extend_from_slice(bytes); } @@ -293,7 +304,7 @@ where B: AsRef<[u8]> /// Returns `None` if the field is not of string type /// or if the bytes are not valid utf-8. pub fn as_str(&self) -> Option<&str> { - if self.as_slice().len() < 5 { + if self.as_slice().len() < TERM_METADATA_LENGTH { return None; } if self.typ() != Type::Str { @@ -307,7 +318,7 @@ where B: AsRef<[u8]> /// Returns `None` if the field is not of facet type /// or if the bytes are not valid utf-8. pub fn as_facet(&self) -> Option { - if self.as_slice().len() < 5 { + if self.as_slice().len() < TERM_METADATA_LENGTH { return None; } if self.typ() != Type::Facet { @@ -321,7 +332,7 @@ where B: AsRef<[u8]> /// /// Returns `None` if the field is not of bytes type. pub fn as_bytes(&self) -> Option<&[u8]> { - if self.as_slice().len() < 5 { + if self.as_slice().len() < TERM_METADATA_LENGTH { return None; } if self.typ() != Type::Bytes { @@ -337,7 +348,7 @@ where B: AsRef<[u8]> /// If the term is a u64, its value is encoded according /// to `byteorder::LittleEndian`. pub fn value_bytes(&self) -> &[u8] { - &self.0.as_ref()[5..] + &self.0.as_ref()[TERM_METADATA_LENGTH..] } /// Returns the underlying `&[u8]`. @@ -451,6 +462,18 @@ mod tests { assert_eq!(term.as_str(), Some("test")) } + /// Size (in bytes) of the buffer of a fast value (u64, i64, f64, or date) term. + /// + + + /// + /// - is a big endian encoded u32 field id + /// - 's most significant bit expresses whether the term is a json term or not + /// The remaining 7 bits are used to encode the type of the value. + /// If this is a JSON term, the type is the type of the leaf of the json. + /// + /// - is, if this is not the json term, a binary representation specific to the type. + /// If it is a JSON Term, then it is prepended with the path that leads to this leaf value. + const FAST_VALUE_TERM_LEN: usize = 4 + 1 + 8; + #[test] pub fn test_term_u64() { let mut schema_builder = Schema::builder(); @@ -458,7 +481,7 @@ mod tests { let term = Term::from_field_u64(count_field, 983u64); assert_eq!(term.field(), count_field); assert_eq!(term.typ(), Type::U64); - assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN); + assert_eq!(term.as_slice().len(), FAST_VALUE_TERM_LEN); assert_eq!(term.as_u64(), Some(983u64)) } @@ -469,7 +492,7 @@ mod tests { let term = Term::from_field_bool(bool_field, true); assert_eq!(term.field(), bool_field); assert_eq!(term.typ(), Type::Bool); - assert_eq!(term.as_slice().len(), super::FAST_VALUE_TERM_LEN); + assert_eq!(term.as_slice().len(), FAST_VALUE_TERM_LEN); assert_eq!(term.as_bool(), Some(true)) } } From 8d75e451bd18740e3f309dcdc7e6f6f05244dd95 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 14 Oct 2022 13:37:29 +0800 Subject: [PATCH 2/3] fix truncate, remove mutable access from term --- src/indexer/json_term_writer.rs | 37 ++++++++++++++----------- src/postings/postings_writer.rs | 6 ++-- src/query/query_parser/query_parser.rs | 3 +- src/schema/term.rs | 38 +++++++++++++------------- 4 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/indexer/json_term_writer.rs b/src/indexer/json_term_writer.rs index 2a08afaaf..68e0955c2 100644 --- a/src/indexer/json_term_writer.rs +++ b/src/indexer/json_term_writer.rs @@ -242,10 +242,12 @@ pub(crate) fn set_string_and_get_terms( ) -> Vec<(usize, Term)> { let mut positions_and_terms = Vec::<(usize, Term)>::new(); json_term_writer.close_path_and_set_type(Type::Str); - let term_num_bytes = json_term_writer.term_buffer.as_slice().len(); + let term_num_bytes = json_term_writer.term_buffer.len_bytes(); let mut token_stream = text_analyzer.token_stream(value); token_stream.process(&mut |token| { - json_term_writer.term_buffer.truncate(term_num_bytes); + json_term_writer + .term_buffer + .truncate_value_bytes(term_num_bytes); json_term_writer .term_buffer .append_bytes(token.text.as_bytes()); @@ -260,7 +262,11 @@ pub struct JsonTermWriter<'a> { } impl<'a> JsonTermWriter<'a> { - pub fn from_json_path(json_path: &str, field: Field, term_buffer: &'a mut Term) -> Self { + pub fn from_field_and_json_path( + field: Field, + json_path: &str, + term_buffer: &'a mut Term, + ) -> Self { term_buffer.set_field_and_type(field, Type::Json); let mut json_term_writer = Self::wrap(term_buffer); for segment in json_path.split('.') { @@ -272,7 +278,7 @@ impl<'a> JsonTermWriter<'a> { pub fn wrap(term_buffer: &'a mut Term) -> Self { term_buffer.clear_with_type(Type::Json); let mut path_stack = Vec::with_capacity(10); - path_stack.push(5); + path_stack.push(0); Self { term_buffer, path_stack, @@ -281,28 +287,28 @@ impl<'a> JsonTermWriter<'a> { fn trim_to_end_of_path(&mut self) { let end_of_path = *self.path_stack.last().unwrap(); - self.term_buffer.truncate(end_of_path); + self.term_buffer.truncate_value_bytes(end_of_path); } pub fn close_path_and_set_type(&mut self, typ: Type) { self.trim_to_end_of_path(); - let buffer = self.term_buffer.as_mut(); + let buffer = self.term_buffer.value_bytes_mut(); let buffer_len = buffer.len(); buffer[buffer_len - 1] = JSON_END_OF_PATH; - buffer.push(typ.to_code()); + self.term_buffer.append_bytes(&[typ.to_code()]); } pub fn push_path_segment(&mut self, segment: &str) { // the path stack should never be empty. self.trim_to_end_of_path(); - let buffer = self.term_buffer.as_mut(); + let buffer = self.term_buffer.value_bytes_mut(); let buffer_len = buffer.len(); if self.path_stack.len() > 1 { buffer[buffer_len - 1] = JSON_PATH_SEGMENT_SEP; } - buffer.extend(segment.as_bytes()); - buffer.push(JSON_PATH_SEGMENT_SEP); - self.path_stack.push(buffer.len()); + self.term_buffer.append_bytes(segment.as_bytes()); + self.term_buffer.append_bytes(&[JSON_PATH_SEGMENT_SEP]); + self.path_stack.push(self.term_buffer.len_bytes()); } pub fn pop_path_segment(&mut self) { @@ -314,8 +320,8 @@ impl<'a> JsonTermWriter<'a> { /// Returns the json path of the term being currently built. #[cfg(test)] pub(crate) fn path(&self) -> &[u8] { - let end_of_path = self.path_stack.last().cloned().unwrap_or(6); - &self.term().as_slice()[5..end_of_path - 1] + let end_of_path = self.path_stack.last().cloned().unwrap_or(1); + &self.term().value_bytes()[..end_of_path - 1] } pub fn set_fast_value(&mut self, val: T) { @@ -328,14 +334,13 @@ impl<'a> JsonTermWriter<'a> { val.to_u64() }; self.term_buffer - .as_mut() - .extend_from_slice(value.to_be_bytes().as_slice()); + .append_bytes(value.to_be_bytes().as_slice()); } #[cfg(test)] pub(crate) fn set_str(&mut self, text: &str) { self.close_path_and_set_type(Type::Str); - self.term_buffer.as_mut().extend_from_slice(text.as_bytes()); + self.term_buffer.append_bytes(text.as_bytes()); } pub fn term(&self) -> &Term { diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 552964a2d..5581479aa 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -153,7 +153,7 @@ pub(crate) trait PostingsWriter: Send + Sync { indexing_position: &mut IndexingPosition, mut term_id_fast_field_writer_opt: Option<&mut MultiValuedFastFieldWriter>, ) { - let end_of_path_idx = term_buffer.as_slice().len(); + let end_of_path_idx = term_buffer.len_bytes(); let mut num_tokens = 0; let mut end_position = 0; token_stream.process(&mut |token: &Token| { @@ -167,7 +167,7 @@ pub(crate) trait PostingsWriter: Send + Sync { ); return; } - term_buffer.truncate(end_of_path_idx); + term_buffer.truncate_value_bytes(end_of_path_idx); term_buffer.append_bytes(token.text.as_bytes()); let start_position = indexing_position.end_position + token.position as u32; end_position = start_position + token.position_length as u32; @@ -181,7 +181,7 @@ pub(crate) trait PostingsWriter: Send + Sync { indexing_position.end_position = end_position + POSITION_GAP; indexing_position.num_tokens += num_tokens; - term_buffer.truncate(end_of_path_idx); + term_buffer.truncate_value_bytes(end_of_path_idx); } fn total_num_tokens(&self) -> u64; diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 355d224d0..729ac10ce 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -735,7 +735,8 @@ fn generate_literals_for_json_object( ) -> Result, QueryParserError> { let mut logical_literals = Vec::new(); let mut term = Term::with_capacity(100); - let mut json_term_writer = JsonTermWriter::from_json_path(json_path, field, &mut term); + let mut json_term_writer = + JsonTermWriter::from_field_and_json_path(field, json_path, &mut term); if let Some(term) = convert_to_fast_value_and_get_term(&mut json_term_writer, phrase) { logical_literals.push(LogicalLiteral::Term(term)); } diff --git a/src/schema/term.rs b/src/schema/term.rs index a664741c0..00264e7ed 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -24,12 +24,6 @@ pub const JSON_END_OF_PATH: u8 = 0u8; pub struct Term>(B) where B: AsRef<[u8]>; -impl AsMut> for Term { - fn as_mut(&mut self) -> &mut Vec { - &mut self.0 - } -} - /// The number of bytes used as metadata by `Term`. const TERM_METADATA_LENGTH: usize = 5; @@ -55,16 +49,16 @@ impl Term { fn from_fast_value(field: Field, val: &T) -> Term { let mut term = Self::with_type_and_field(T::to_type(), field); - term.set_field_and_type(field, T::to_type()); term.set_u64(val.to_u64()); term } - /// Panics when there are byte values. + /// Panics when the term is not empty... ie: some value is set. + /// Use `clear_with_field_and_type` in that case. /// /// Sets field and the type. pub(crate) fn set_field_and_type(&mut self, field: Field, typ: Type) { - assert_eq!(self.0.len(), TERM_METADATA_LENGTH); + assert!(self.is_empty()); self.0[0..4].clone_from_slice(field.field_id().to_be_bytes().as_ref()); self.0[4] = typ.to_code(); } @@ -173,11 +167,26 @@ impl Term { } /// Truncates the term. The new length needs to be at least 5, which is reserved for metadata. - pub fn truncate(&mut self, len: usize) { + fn truncate(&mut self, len: usize) { assert!(len >= TERM_METADATA_LENGTH); self.0.truncate(len); } + /// Truncates the value bytes of the term. Value and field type stays the same. + pub fn truncate_value_bytes(&mut self, len: usize) { + self.0.truncate(len + TERM_METADATA_LENGTH); + } + + /// Returns the value bytes as mutable slice + pub fn value_bytes_mut(&mut self) -> &mut [u8] { + &mut self.0[TERM_METADATA_LENGTH..] + } + + /// The length of the bytes. + pub fn len_bytes(&self) -> usize { + self.0.len() - TERM_METADATA_LENGTH + } + /// Appends value bytes to the Term. pub fn append_bytes(&mut self, bytes: &[u8]) { self.0.extend_from_slice(bytes); @@ -304,9 +313,6 @@ where B: AsRef<[u8]> /// Returns `None` if the field is not of string type /// or if the bytes are not valid utf-8. pub fn as_str(&self) -> Option<&str> { - if self.as_slice().len() < TERM_METADATA_LENGTH { - return None; - } if self.typ() != Type::Str { return None; } @@ -318,9 +324,6 @@ where B: AsRef<[u8]> /// Returns `None` if the field is not of facet type /// or if the bytes are not valid utf-8. pub fn as_facet(&self) -> Option { - if self.as_slice().len() < TERM_METADATA_LENGTH { - return None; - } if self.typ() != Type::Facet { return None; } @@ -332,9 +335,6 @@ where B: AsRef<[u8]> /// /// Returns `None` if the field is not of bytes type. pub fn as_bytes(&self) -> Option<&[u8]> { - if self.as_slice().len() < TERM_METADATA_LENGTH { - return None; - } if self.typ() != Type::Bytes { return None; } From 024e53a99c2ec6029ce72811841e6e68dbd0cae1 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Fri, 14 Oct 2022 15:19:45 +0800 Subject: [PATCH 3/3] remove truncate --- src/schema/term.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/schema/term.rs b/src/schema/term.rs index 00264e7ed..8a398818a 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -111,13 +111,13 @@ impl Term { /// Removes the value_bytes and set the field and type code. pub(crate) fn clear_with_field_and_type(&mut self, typ: Type, field: Field) { - self.truncate(TERM_METADATA_LENGTH); + self.truncate_value_bytes(0); self.set_field_and_type(field, typ); } /// Removes the value_bytes and set the type code. pub fn clear_with_type(&mut self, typ: Type) { - self.truncate(TERM_METADATA_LENGTH); + self.truncate_value_bytes(0); self.0[4] = typ.to_code(); } @@ -157,7 +157,7 @@ impl Term { /// Sets the value of a `Bytes` field. pub fn set_bytes(&mut self, bytes: &[u8]) { - self.0.truncate(TERM_METADATA_LENGTH); + self.truncate_value_bytes(0); self.0.extend(bytes); } @@ -166,12 +166,6 @@ impl Term { self.set_bytes(text.as_bytes()); } - /// Truncates the term. The new length needs to be at least 5, which is reserved for metadata. - fn truncate(&mut self, len: usize) { - assert!(len >= TERM_METADATA_LENGTH); - self.0.truncate(len); - } - /// Truncates the value bytes of the term. Value and field type stays the same. pub fn truncate_value_bytes(&mut self, len: usize) { self.0.truncate(len + TERM_METADATA_LENGTH);