mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
refactor Term (#2006)
* refactor Term add ValueBytes for serialized term values add missing debug for ip skip unnecessary json path validation remove code duplication add DATE_TIME_PRECISION_INDEXED constant add missing Term clarification remove weird value_bytes_mut() API * fix naming
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
//! SIMD filtering of a vector as described in the following blog post.
|
||||
//! https://quickwit.io/blog/filtering%20a%20vector%20with%20simd%20instructions%20avx-2%20and%20avx-512
|
||||
//! <https://quickwit.io/blog/filtering%20a%20vector%20with%20simd%20instructions%20avx-2%20and%20avx-512>
|
||||
use std::arch::x86_64::{
|
||||
__m256i as DataType, _mm256_add_epi32 as op_add, _mm256_cmpgt_epi32 as op_greater,
|
||||
_mm256_lddqu_si256 as load_unaligned, _mm256_or_si256 as op_or, _mm256_set1_epi32 as set1,
|
||||
|
||||
@@ -61,7 +61,7 @@ impl InvertedIndexReader {
|
||||
|
||||
/// Returns the term info associated with the term.
|
||||
pub fn get_term_info(&self, term: &Term) -> io::Result<Option<TermInfo>> {
|
||||
self.termdict.get(term.value_bytes())
|
||||
self.termdict.get(term.serialized_value_bytes())
|
||||
}
|
||||
|
||||
/// Return the term dictionary datastructure.
|
||||
@@ -203,7 +203,7 @@ impl InvertedIndexReader {
|
||||
#[cfg(feature = "quickwit")]
|
||||
impl InvertedIndexReader {
|
||||
pub(crate) async fn get_term_info_async(&self, term: &Term) -> io::Result<Option<TermInfo>> {
|
||||
self.termdict.get_async(term.value_bytes()).await
|
||||
self.termdict.get_async(term.serialized_value_bytes()).await
|
||||
}
|
||||
|
||||
/// Returns a block postings given a `Term`.
|
||||
|
||||
@@ -5,12 +5,12 @@ use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
|
||||
use crate::schema::term::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
|
||||
use crate::schema::{Field, Type};
|
||||
use crate::schema::term::{JSON_PATH_SEGMENT_SEP, JSON_PATH_SEGMENT_SEP_STR};
|
||||
use crate::schema::{Field, Type, DATE_TIME_PRECISION_INDEXED};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::{OffsetDateTime, UtcOffset};
|
||||
use crate::tokenizer::TextAnalyzer;
|
||||
use crate::{DatePrecision, DateTime, DocId, Term};
|
||||
use crate::{DateTime, DocId, Term};
|
||||
|
||||
/// This object is a map storing the last position for a given path for the current document
|
||||
/// being indexed.
|
||||
@@ -59,7 +59,7 @@ struct IndexingPositionsPerPath {
|
||||
impl IndexingPositionsPerPath {
|
||||
fn get_position(&mut self, term: &Term) -> &mut IndexingPosition {
|
||||
self.positions_per_path
|
||||
.entry(murmurhash2(term.as_slice()))
|
||||
.entry(murmurhash2(term.serialized_term()))
|
||||
.or_insert_with(Default::default)
|
||||
}
|
||||
}
|
||||
@@ -257,6 +257,9 @@ pub(crate) fn set_string_and_get_terms(
|
||||
positions_and_terms
|
||||
}
|
||||
|
||||
/// Writes a value of a JSON field to a `Term`.
|
||||
/// The Term format is as follows:
|
||||
/// [JSON_TYPE][JSON_PATH][JSON_END_OF_PATH][VALUE_BYTES]
|
||||
pub struct JsonTermWriter<'a> {
|
||||
term_buffer: &'a mut Term,
|
||||
path_stack: Vec<usize>,
|
||||
@@ -355,27 +358,23 @@ impl<'a> JsonTermWriter<'a> {
|
||||
|
||||
pub fn close_path_and_set_type(&mut self, typ: Type) {
|
||||
self.trim_to_end_of_path();
|
||||
let buffer = self.term_buffer.value_bytes_mut();
|
||||
let buffer_len = buffer.len();
|
||||
buffer[buffer_len - 1] = JSON_END_OF_PATH;
|
||||
self.term_buffer.set_json_path_end();
|
||||
self.term_buffer.append_bytes(&[typ.to_code()]);
|
||||
}
|
||||
|
||||
pub fn push_path_segment(&mut self, segment: &str) {
|
||||
// the path stack should never be empty.
|
||||
self.trim_to_end_of_path();
|
||||
let buffer = self.term_buffer.value_bytes_mut();
|
||||
let buffer_len = buffer.len();
|
||||
|
||||
if self.path_stack.len() > 1 {
|
||||
buffer[buffer_len - 1] = JSON_PATH_SEGMENT_SEP;
|
||||
self.term_buffer.set_json_path_separator();
|
||||
}
|
||||
let appended_segment = self.term_buffer.append_bytes(segment.as_bytes());
|
||||
if self.expand_dots_enabled {
|
||||
// We need to replace `.` by JSON_PATH_SEGMENT_SEP.
|
||||
replace_in_place(b'.', JSON_PATH_SEGMENT_SEP, appended_segment);
|
||||
}
|
||||
self.term_buffer.push_byte(JSON_PATH_SEGMENT_SEP);
|
||||
self.term_buffer.add_json_path_separator();
|
||||
self.path_stack.push(self.term_buffer.len_bytes());
|
||||
}
|
||||
|
||||
@@ -389,14 +388,14 @@ impl<'a> JsonTermWriter<'a> {
|
||||
#[cfg(test)]
|
||||
pub(crate) fn path(&self) -> &[u8] {
|
||||
let end_of_path = self.path_stack.last().cloned().unwrap_or(1);
|
||||
&self.term().value_bytes()[..end_of_path - 1]
|
||||
&self.term().serialized_value_bytes()[..end_of_path - 1]
|
||||
}
|
||||
|
||||
pub(crate) fn set_fast_value<T: FastValue>(&mut self, val: T) {
|
||||
self.close_path_and_set_type(T::to_type());
|
||||
let value = if T::to_type() == Type::Date {
|
||||
DateTime::from_u64(val.to_u64())
|
||||
.truncate(DatePrecision::Seconds)
|
||||
.truncate(DATE_TIME_PRECISION_INDEXED)
|
||||
.to_u64()
|
||||
} else {
|
||||
val.to_u64()
|
||||
@@ -431,12 +430,12 @@ mod tests {
|
||||
json_writer.set_str("red");
|
||||
assert_eq!(
|
||||
format!("{:?}", json_writer.term()),
|
||||
"Term(type=Json, field=1, path=attributes.color, vtype=Str, \"red\")"
|
||||
"Term(field=1, type=Json, path=attributes.color, type=Str, \"red\")"
|
||||
);
|
||||
json_writer.set_str("blue");
|
||||
assert_eq!(
|
||||
format!("{:?}", json_writer.term()),
|
||||
"Term(type=Json, field=1, path=attributes.color, vtype=Str, \"blue\")"
|
||||
"Term(field=1, type=Json, path=attributes.color, type=Str, \"blue\")"
|
||||
);
|
||||
json_writer.pop_path_segment();
|
||||
json_writer.push_path_segment("dimensions");
|
||||
@@ -444,14 +443,14 @@ mod tests {
|
||||
json_writer.set_fast_value(400i64);
|
||||
assert_eq!(
|
||||
format!("{:?}", json_writer.term()),
|
||||
"Term(type=Json, field=1, path=attributes.dimensions.width, vtype=I64, 400)"
|
||||
"Term(field=1, type=Json, path=attributes.dimensions.width, type=I64, 400)"
|
||||
);
|
||||
json_writer.pop_path_segment();
|
||||
json_writer.push_path_segment("height");
|
||||
json_writer.set_fast_value(300i64);
|
||||
assert_eq!(
|
||||
format!("{:?}", json_writer.term()),
|
||||
"Term(type=Json, field=1, path=attributes.dimensions.height, vtype=I64, 300)"
|
||||
"Term(field=1, type=Json, path=attributes.dimensions.height, type=I64, 300)"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -463,7 +462,7 @@ mod tests {
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_str("red");
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jcolor\x00sred"
|
||||
)
|
||||
}
|
||||
@@ -476,7 +475,7 @@ mod tests {
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(-4i64);
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jcolor\x00i\x7f\xff\xff\xff\xff\xff\xff\xfc"
|
||||
)
|
||||
}
|
||||
@@ -489,7 +488,7 @@ mod tests {
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(4u64);
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jcolor\x00u\x00\x00\x00\x00\x00\x00\x00\x04"
|
||||
)
|
||||
}
|
||||
@@ -502,7 +501,7 @@ mod tests {
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(4.0f64);
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jcolor\x00f\xc0\x10\x00\x00\x00\x00\x00\x00"
|
||||
)
|
||||
}
|
||||
@@ -515,7 +514,7 @@ mod tests {
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_fast_value(true);
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jcolor\x00o\x00\x00\x00\x00\x00\x00\x00\x01"
|
||||
)
|
||||
}
|
||||
@@ -530,7 +529,7 @@ mod tests {
|
||||
json_writer.push_path_segment("color");
|
||||
json_writer.set_str("red");
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jattribute\x01color\x00sred"
|
||||
)
|
||||
}
|
||||
@@ -545,7 +544,7 @@ mod tests {
|
||||
json_writer.pop_path_segment();
|
||||
json_writer.set_str("red");
|
||||
assert_eq!(
|
||||
json_writer.term().as_slice(),
|
||||
json_writer.term().serialized_term(),
|
||||
b"\x00\x00\x00\x01jcolor\x00sred"
|
||||
)
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
//! Fields have to be declared as `FAST` in the schema.
|
||||
//! Currently supported fields are: u64, i64, f64, bytes, ip and text.
|
||||
//!
|
||||
//! Fast fields are stored in with [different codecs](fastfield_codecs). The best codec is detected
|
||||
//! Fast fields are stored in with [different codecs](columnar). The best codec is detected
|
||||
//! automatically, when serializing.
|
||||
//!
|
||||
//! Read access performance is comparable to that of an array lookup.
|
||||
|
||||
@@ -12,10 +12,10 @@ use crate::postings::{
|
||||
compute_table_memory_size, serialize_postings, IndexingContext, IndexingPosition,
|
||||
PerFieldPostingsWriter, PostingsWriter,
|
||||
};
|
||||
use crate::schema::{FieldEntry, FieldType, Schema, Term, Value};
|
||||
use crate::schema::{FieldEntry, FieldType, Schema, Term, Value, DATE_TIME_PRECISION_INDEXED};
|
||||
use crate::store::{StoreReader, StoreWriter};
|
||||
use crate::tokenizer::{FacetTokenizer, PreTokenizedStream, TextAnalyzer, Tokenizer};
|
||||
use crate::{DatePrecision, DocId, Document, Opstamp, SegmentComponent};
|
||||
use crate::{DocId, Document, Opstamp, SegmentComponent};
|
||||
|
||||
/// Computes the initial size of the hash table.
|
||||
///
|
||||
@@ -246,7 +246,8 @@ impl SegmentWriter {
|
||||
for value in values {
|
||||
num_vals += 1;
|
||||
let date_val = value.as_date().ok_or_else(make_schema_error)?;
|
||||
term_buffer.set_u64(date_val.truncate(DatePrecision::Seconds).to_u64());
|
||||
term_buffer
|
||||
.set_u64(date_val.truncate(DATE_TIME_PRECISION_INDEXED).to_u64());
|
||||
postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx);
|
||||
}
|
||||
if field_entry.has_fieldnorms() {
|
||||
@@ -551,14 +552,20 @@ mod tests {
|
||||
json_term_writer.push_path_segment("bool");
|
||||
json_term_writer.set_fast_value(true);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("complexobject");
|
||||
json_term_writer.push_path_segment("field.with.dot");
|
||||
json_term_writer.set_fast_value(1i64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.pop_path_segment();
|
||||
@@ -567,55 +574,85 @@ mod tests {
|
||||
OffsetDateTime::parse("1985-04-12T23:20:50.52Z", &Rfc3339).unwrap(),
|
||||
));
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("float");
|
||||
json_term_writer.set_fast_value(-0.2f64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("my_arr");
|
||||
json_term_writer.set_fast_value(2i64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.set_fast_value(3i64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.set_fast_value(4i64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.push_path_segment("my_key");
|
||||
json_term_writer.set_str("tokens");
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.set_str("two");
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("signed");
|
||||
json_term_writer.set_fast_value(-2i64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("toto");
|
||||
json_term_writer.set_str("titi");
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
|
||||
json_term_writer.pop_path_segment();
|
||||
json_term_writer.push_path_segment("unsigned");
|
||||
json_term_writer.set_fast_value(1i64);
|
||||
assert!(term_stream.advance());
|
||||
assert_eq!(term_stream.key(), json_term_writer.term().value_bytes());
|
||||
assert_eq!(
|
||||
term_stream.key(),
|
||||
json_term_writer.term().serialized_value_bytes()
|
||||
);
|
||||
assert!(!term_stream.advance());
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::postings_writer::SpecializedPostingsWriter;
|
||||
use crate::postings::recorder::{BufferLender, DocIdRecorder, Recorder};
|
||||
use crate::postings::{FieldSerializer, IndexingContext, IndexingPosition, PostingsWriter};
|
||||
use crate::schema::term::as_json_path_type_value_bytes;
|
||||
use crate::schema::Type;
|
||||
use crate::tokenizer::TokenStream;
|
||||
use crate::{DocId, Term};
|
||||
@@ -61,8 +60,8 @@ impl<Rec: Recorder> PostingsWriter for JsonPostingsWriter<Rec> {
|
||||
) -> io::Result<()> {
|
||||
let mut buffer_lender = BufferLender::default();
|
||||
for (term, addr) in term_addrs {
|
||||
// TODO optimization opportunity here.
|
||||
if let Some((_, typ, _)) = as_json_path_type_value_bytes(term.value_bytes()) {
|
||||
if let Some(json_value) = term.value().as_json_value_bytes() {
|
||||
let typ = json_value.typ();
|
||||
if typ == Type::Str {
|
||||
SpecializedPostingsWriter::<Rec>::serialize_one_term(
|
||||
term,
|
||||
|
||||
@@ -171,7 +171,7 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
|
||||
) -> io::Result<()> {
|
||||
let recorder: Rec = ctx.term_index.read(addr);
|
||||
let term_doc_freq = recorder.term_doc_freq().unwrap_or(0u32);
|
||||
serializer.new_term(term.value_bytes(), term_doc_freq)?;
|
||||
serializer.new_term(term.serialized_value_bytes(), term_doc_freq)?;
|
||||
recorder.serialize(&ctx.arena, doc_id_map, serializer, buffer_lender);
|
||||
serializer.close_term()?;
|
||||
Ok(())
|
||||
@@ -180,10 +180,10 @@ impl<Rec: Recorder> SpecializedPostingsWriter<Rec> {
|
||||
|
||||
impl<Rec: Recorder> PostingsWriter for SpecializedPostingsWriter<Rec> {
|
||||
fn subscribe(&mut self, doc: DocId, position: u32, term: &Term, ctx: &mut IndexingContext) {
|
||||
debug_assert!(term.as_slice().len() >= 4);
|
||||
debug_assert!(term.serialized_term().len() >= 4);
|
||||
self.total_num_tokens += 1;
|
||||
let (term_index, arena) = (&mut ctx.term_index, &mut ctx.arena);
|
||||
term_index.mutate_or_create(term.as_slice(), |opt_recorder: Option<Rec>| {
|
||||
term_index.mutate_or_create(term.serialized_term(), |opt_recorder: Option<Rec>| {
|
||||
if let Some(mut recorder) = opt_recorder {
|
||||
let current_doc = recorder.current_doc();
|
||||
if current_doc != doc {
|
||||
|
||||
@@ -131,7 +131,8 @@ impl FuzzyTermQuery {
|
||||
LevenshteinAutomatonBuilder::new(self.distance, self.transposition_cost_one)
|
||||
});
|
||||
|
||||
let term_text = self.term.as_str().ok_or_else(|| {
|
||||
let term_value = self.term.value();
|
||||
let term_text = term_value.as_str().ok_or_else(|| {
|
||||
InvalidArgument("The fuzzy term query requires a string term.".to_string())
|
||||
})?;
|
||||
let automaton = if self.prefix {
|
||||
|
||||
@@ -138,14 +138,15 @@ impl Query for PhrasePrefixQuery {
|
||||
Ok(Box::new(phrase_weight))
|
||||
} else {
|
||||
// There are no prefix. Let's just match the suffix.
|
||||
let end_term = if let Some(end_value) = prefix_end(self.prefix.1.value_bytes()) {
|
||||
let mut end_term = Term::with_capacity(end_value.len());
|
||||
end_term.set_field_and_type(self.field, self.prefix.1.typ());
|
||||
end_term.append_bytes(&end_value);
|
||||
Bound::Excluded(end_term)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
let end_term =
|
||||
if let Some(end_value) = prefix_end(self.prefix.1.serialized_value_bytes()) {
|
||||
let mut end_term = Term::with_capacity(end_value.len());
|
||||
end_term.set_field_and_type(self.field, self.prefix.1.typ());
|
||||
end_term.append_bytes(&end_value);
|
||||
Bound::Excluded(end_term)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
let mut range_query = RangeQuery::new_term_bounds(
|
||||
enable_scoring
|
||||
|
||||
@@ -78,8 +78,11 @@ impl PhrasePrefixWeight {
|
||||
}
|
||||
|
||||
let inv_index = reader.inverted_index(self.prefix.1.field())?;
|
||||
let mut stream = inv_index.terms().range().ge(self.prefix.1.value_bytes());
|
||||
if let Some(end) = prefix_end(self.prefix.1.value_bytes()) {
|
||||
let mut stream = inv_index
|
||||
.terms()
|
||||
.range()
|
||||
.ge(self.prefix.1.serialized_value_bytes());
|
||||
if let Some(end) = prefix_end(self.prefix.1.serialized_value_bytes()) {
|
||||
stream = stream.lt(&end);
|
||||
}
|
||||
|
||||
|
||||
@@ -952,7 +952,7 @@ mod test {
|
||||
let query = query_parser.parse_query("facet:/root/branch/leaf").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", query),
|
||||
r#"TermQuery(Term(type=Facet, field=11, "/root/branch/leaf"))"#
|
||||
r#"TermQuery(Term(field=11, type=Facet, Facet(/root/branch/leaf)))"#
|
||||
);
|
||||
}
|
||||
|
||||
@@ -965,7 +965,7 @@ mod test {
|
||||
let query = query_parser.parse_query("text:hello").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", query),
|
||||
r#"Boost(query=TermQuery(Term(type=Str, field=1, "hello")), boost=2)"#
|
||||
r#"Boost(query=TermQuery(Term(field=1, type=Str, "hello")), boost=2)"#
|
||||
);
|
||||
}
|
||||
|
||||
@@ -988,7 +988,7 @@ mod test {
|
||||
let query = query_parser.parse_query("text:hello^2").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", query),
|
||||
r#"Boost(query=Boost(query=TermQuery(Term(type=Str, field=1, "hello")), boost=2), boost=2)"#
|
||||
r#"Boost(query=Boost(query=TermQuery(Term(field=1, type=Str, "hello")), boost=2), boost=2)"#
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1027,7 +1027,7 @@ mod test {
|
||||
pub fn test_parse_query_untokenized() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"nottokenized:\"wordone wordtwo\"",
|
||||
r#"Term(type=Str, field=7, "wordone wordtwo")"#,
|
||||
r#"Term(field=7, type=Str, "wordone wordtwo")"#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1070,7 +1070,7 @@ mod test {
|
||||
.is_ok());
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"unsigned:2324",
|
||||
"Term(type=U64, field=3, 2324)",
|
||||
"Term(field=3, type=U64, 2324)",
|
||||
false,
|
||||
);
|
||||
|
||||
@@ -1097,7 +1097,7 @@ mod test {
|
||||
fn test_parse_bytes() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"bytes:YnVidQ==",
|
||||
"Term(type=Bytes, field=12, [98, 117, 98, 117])",
|
||||
"Term(field=12, type=Bytes, [98, 117, 98, 117])",
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1124,7 +1124,7 @@ mod test {
|
||||
fn test_json_field() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:hello",
|
||||
"Term(type=Json, field=14, path=titi, vtype=Str, \"hello\")",
|
||||
"Term(field=14, type=Json, path=titi, type=Str, \"hello\")",
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1136,7 +1136,9 @@ mod test {
|
||||
let LogicalLiteral::Term(term) = *literal else {
|
||||
panic!();
|
||||
};
|
||||
std::str::from_utf8(term.value_bytes()).unwrap().to_string()
|
||||
std::str::from_utf8(term.serialized_value_bytes())
|
||||
.unwrap()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1155,17 +1157,17 @@ mod test {
|
||||
fn test_json_field_possibly_a_number() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:5",
|
||||
r#"(Term(type=Json, field=14, path=titi, vtype=U64, 5) Term(type=Json, field=14, path=titi, vtype=Str, "5"))"#,
|
||||
r#"(Term(field=14, type=Json, path=titi, type=U64, 5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:-5",
|
||||
r#"(Term(type=Json, field=14, path=titi, vtype=I64, -5) Term(type=Json, field=14, path=titi, vtype=Str, "5"))"#, //< Yes this is a bit weird after going through the tokenizer we lose the "-".
|
||||
r#"(Term(field=14, type=Json, path=titi, type=I64, -5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#, //< Yes this is a bit weird after going through the tokenizer we lose the "-".
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:-5.2",
|
||||
r#"(Term(type=Json, field=14, path=titi, vtype=F64, -5.2) "[(0, Term(type=Json, field=14, path=titi, vtype=Str, "5")), (1, Term(type=Json, field=14, path=titi, vtype=Str, "2"))]")"#,
|
||||
r#"(Term(field=14, type=Json, path=titi, type=F64, -5.2) "[(0, Term(field=14, type=Json, path=titi, type=Str, "5")), (1, Term(field=14, type=Json, path=titi, type=Str, "2"))]")"#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@@ -1174,7 +1176,7 @@ mod test {
|
||||
fn test_json_field_possibly_a_date() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"json.date:"2019-10-12T07:20:50.52Z""#,
|
||||
r#"(Term(type=Json, field=14, path=date, vtype=Date, 2019-10-12T07:20:50Z) "[(0, Term(type=Json, field=14, path=date, vtype=Str, "2019")), (1, Term(type=Json, field=14, path=date, vtype=Str, "10")), (2, Term(type=Json, field=14, path=date, vtype=Str, "12t07")), (3, Term(type=Json, field=14, path=date, vtype=Str, "20")), (4, Term(type=Json, field=14, path=date, vtype=Str, "50")), (5, Term(type=Json, field=14, path=date, vtype=Str, "52z"))]")"#,
|
||||
r#"(Term(field=14, type=Json, path=date, type=Date, 2019-10-12T07:20:50Z) "[(0, Term(field=14, type=Json, path=date, type=Str, "2019")), (1, Term(field=14, type=Json, path=date, type=Str, "10")), (2, Term(field=14, type=Json, path=date, type=Str, "12t07")), (3, Term(field=14, type=Json, path=date, type=Str, "20")), (4, Term(field=14, type=Json, path=date, type=Str, "50")), (5, Term(field=14, type=Json, path=date, type=Str, "52z"))]")"#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@@ -1183,7 +1185,7 @@ mod test {
|
||||
fn test_json_field_possibly_a_bool() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"json.titi:true",
|
||||
r#"(Term(type=Json, field=14, path=titi, vtype=Bool, true) Term(type=Json, field=14, path=titi, vtype=Str, "true"))"#,
|
||||
r#"(Term(field=14, type=Json, path=titi, type=Bool, true) Term(field=14, type=Json, path=titi, type=Str, "true"))"#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@@ -1212,8 +1214,8 @@ mod test {
|
||||
fn test_json_default() {
|
||||
test_query_to_logical_ast_with_default_json(
|
||||
"titi:4",
|
||||
"(Term(type=Json, field=14, path=titi, vtype=U64, 4) Term(type=Json, field=14, \
|
||||
path=titi, vtype=Str, \"4\"))",
|
||||
"(Term(field=14, type=Json, path=titi, type=U64, 4) Term(field=14, type=Json, \
|
||||
path=titi, type=Str, \"4\"))",
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1223,7 +1225,7 @@ mod test {
|
||||
for conjunction in [false, true] {
|
||||
test_query_to_logical_ast_with_default_json(
|
||||
"text:4",
|
||||
r#"Term(type=Str, field=1, "4")"#,
|
||||
r#"Term(field=1, type=Str, "4")"#,
|
||||
conjunction,
|
||||
);
|
||||
}
|
||||
@@ -1234,7 +1236,7 @@ mod test {
|
||||
for conjunction in [false, true] {
|
||||
test_query_to_logical_ast_with_default_json(
|
||||
"json:4",
|
||||
r#"(Term(type=Json, field=14, path=, vtype=U64, 4) Term(type=Json, field=14, path=, vtype=Str, "4"))"#,
|
||||
r#"(Term(field=14, type=Json, path=, type=U64, 4) Term(field=14, type=Json, path=, type=Str, "4"))"#,
|
||||
conjunction,
|
||||
);
|
||||
}
|
||||
@@ -1244,7 +1246,7 @@ mod test {
|
||||
fn test_parse_bytes_phrase() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"bytes:\"YnVidQ==\"",
|
||||
"Term(type=Bytes, field=12, [98, 117, 98, 117])",
|
||||
"Term(field=12, type=Bytes, [98, 117, 98, 117])",
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1260,12 +1262,12 @@ mod test {
|
||||
fn test_parse_query_to_ast_ab_c() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"(+title:a +title:b) title:c",
|
||||
r#"((+Term(type=Str, field=0, "a") +Term(type=Str, field=0, "b")) Term(type=Str, field=0, "c"))"#,
|
||||
r#"((+Term(field=0, type=Str, "a") +Term(field=0, type=Str, "b")) Term(field=0, type=Str, "c"))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"(+title:a +title:b) title:c",
|
||||
r#"(+(+Term(type=Str, field=0, "a") +Term(type=Str, field=0, "b")) +Term(type=Str, field=0, "c"))"#,
|
||||
r#"(+(+Term(field=0, type=Str, "a") +Term(field=0, type=Str, "b")) +Term(field=0, type=Str, "c"))"#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@@ -1274,17 +1276,17 @@ mod test {
|
||||
pub fn test_parse_query_to_ast_single_term() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:toto",
|
||||
r#"Term(type=Str, field=0, "toto")"#,
|
||||
r#"Term(field=0, type=Str, "toto")"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"+title:toto",
|
||||
r#"Term(type=Str, field=0, "toto")"#,
|
||||
r#"Term(field=0, type=Str, "toto")"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"+title:toto -titi",
|
||||
r#"(+Term(type=Str, field=0, "toto") -(Term(type=Str, field=0, "titi") Term(type=Str, field=1, "titi")))"#,
|
||||
r#"(+Term(field=0, type=Str, "toto") -(Term(field=0, type=Str, "titi") Term(field=1, type=Str, "titi")))"#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1301,12 +1303,12 @@ mod test {
|
||||
pub fn test_parse_query_to_ast_two_terms() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:a b",
|
||||
r#"(Term(type=Str, field=0, "a") (Term(type=Str, field=0, "b") Term(type=Str, field=1, "b")))"#,
|
||||
r#"(Term(field=0, type=Str, "a") (Term(field=0, type=Str, "b") Term(field=1, type=Str, "b")))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"title:"a b""#,
|
||||
r#""[(0, Term(type=Str, field=0, "a")), (1, Term(type=Str, field=0, "b"))]""#,
|
||||
r#""[(0, Term(field=0, type=Str, "a")), (1, Term(field=0, type=Str, "b"))]""#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1329,37 +1331,37 @@ mod test {
|
||||
pub fn test_parse_query_to_ast_ranges() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:[a TO b]",
|
||||
r#"(Included(Term(type=Str, field=0, "a")) TO Included(Term(type=Str, field=0, "b")))"#,
|
||||
r#"(Included(Term(field=0, type=Str, "a")) TO Included(Term(field=0, type=Str, "b")))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:{titi TO toto}",
|
||||
r#"(Excluded(Term(type=Str, field=0, "titi")) TO Excluded(Term(type=Str, field=0, "toto")))"#,
|
||||
r#"(Excluded(Term(field=0, type=Str, "titi")) TO Excluded(Term(field=0, type=Str, "toto")))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:{* TO toto}",
|
||||
r#"(Unbounded TO Excluded(Term(type=Str, field=0, "toto")))"#,
|
||||
r#"(Unbounded TO Excluded(Term(field=0, type=Str, "toto")))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:{titi TO *}",
|
||||
r#"(Excluded(Term(type=Str, field=0, "titi")) TO Unbounded)"#,
|
||||
r#"(Excluded(Term(field=0, type=Str, "titi")) TO Unbounded)"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"signed:{-5 TO 3}",
|
||||
r#"(Excluded(Term(type=I64, field=2, -5)) TO Excluded(Term(type=I64, field=2, 3)))"#,
|
||||
r#"(Excluded(Term(field=2, type=I64, -5)) TO Excluded(Term(field=2, type=I64, 3)))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"float:{-1.5 TO 1.5}",
|
||||
r#"(Excluded(Term(type=F64, field=10, -1.5)) TO Excluded(Term(type=F64, field=10, 1.5)))"#,
|
||||
r#"(Excluded(Term(field=10, type=F64, -1.5)) TO Excluded(Term(field=10, type=F64, 1.5)))"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"u64_ff:[7 TO 77]",
|
||||
r#"(Included(Term(type=U64, field=18, 7)) TO Included(Term(type=U64, field=18, 77)))"#,
|
||||
r#"(Included(Term(field=18, type=U64, 7)) TO Included(Term(field=18, type=U64, 77)))"#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1462,12 +1464,12 @@ mod test {
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"date:"2010-11-21T09:55:06.000000000+02:00""#,
|
||||
r#"Term(type=Date, field=9, 2010-11-21T07:55:06Z)"#,
|
||||
r#"Term(field=9, type=Date, 2010-11-21T07:55:06Z)"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
r#"date:"1985-04-12T23:20:50.52Z""#,
|
||||
r#"Term(type=Date, field=9, 1985-04-12T23:20:50Z)"#,
|
||||
r#"Term(field=9, type=Date, 1985-04-12T23:20:50Z)"#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@@ -1508,27 +1510,27 @@ mod test {
|
||||
pub fn test_parse_query_to_ast_conjunction() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:toto",
|
||||
r#"Term(type=Str, field=0, "toto")"#,
|
||||
r#"Term(field=0, type=Str, "toto")"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"+title:toto",
|
||||
r#"Term(type=Str, field=0, "toto")"#,
|
||||
r#"Term(field=0, type=Str, "toto")"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"+title:toto -titi",
|
||||
r#"(+Term(type=Str, field=0, "toto") -(Term(type=Str, field=0, "titi") Term(type=Str, field=1, "titi")))"#,
|
||||
r#"(+Term(field=0, type=Str, "toto") -(Term(field=0, type=Str, "titi") Term(field=1, type=Str, "titi")))"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:a b",
|
||||
r#"(+Term(type=Str, field=0, "a") +(Term(type=Str, field=0, "b") Term(type=Str, field=1, "b")))"#,
|
||||
r#"(+Term(field=0, type=Str, "a") +(Term(field=0, type=Str, "b") Term(field=1, type=Str, "b")))"#,
|
||||
true,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:\"a b\"",
|
||||
r#""[(0, Term(type=Str, field=0, "a")), (1, Term(type=Str, field=0, "b"))]""#,
|
||||
r#""[(0, Term(field=0, type=Str, "a")), (1, Term(field=0, type=Str, "b"))]""#,
|
||||
true,
|
||||
);
|
||||
}
|
||||
@@ -1537,7 +1539,7 @@ mod test {
|
||||
pub fn test_query_parser_hyphen() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:www-form-encoded",
|
||||
r#""[(0, Term(type=Str, field=0, "www")), (1, Term(type=Str, field=0, "form")), (2, Term(type=Str, field=0, "encoded"))]""#,
|
||||
r#""[(0, Term(field=0, type=Str, "www")), (1, Term(field=0, type=Str, "form")), (2, Term(field=0, type=Str, "encoded"))]""#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1547,7 +1549,7 @@ mod test {
|
||||
for &default_conjunction in &[false, true] {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:a AND title:b",
|
||||
r#"(+Term(type=Str, field=0, "a") +Term(type=Str, field=0, "b"))"#,
|
||||
r#"(+Term(field=0, type=Str, "a") +Term(field=0, type=Str, "b"))"#,
|
||||
default_conjunction,
|
||||
);
|
||||
}
|
||||
@@ -1558,7 +1560,7 @@ mod test {
|
||||
for &default_conjunction in &[false, true] {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:a OR title:b",
|
||||
r#"(Term(type=Str, field=0, "a") Term(type=Str, field=0, "b"))"#,
|
||||
r#"(Term(field=0, type=Str, "a") Term(field=0, type=Str, "b"))"#,
|
||||
default_conjunction,
|
||||
);
|
||||
}
|
||||
@@ -1573,7 +1575,7 @@ mod test {
|
||||
let query = query_parser.parse_query(r#"a\.b:hello"#).unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", query),
|
||||
"TermQuery(Term(type=Str, field=0, \"hello\"))"
|
||||
"TermQuery(Term(field=0, type=Str, \"hello\"))"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1614,17 +1616,17 @@ mod test {
|
||||
pub fn test_phrase_slop() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"\"a b\"~0",
|
||||
r#"("[(0, Term(type=Str, field=0, "a")), (1, Term(type=Str, field=0, "b"))]" "[(0, Term(type=Str, field=1, "a")), (1, Term(type=Str, field=1, "b"))]")"#,
|
||||
r#"("[(0, Term(field=0, type=Str, "a")), (1, Term(field=0, type=Str, "b"))]" "[(0, Term(field=1, type=Str, "a")), (1, Term(field=1, type=Str, "b"))]")"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"\"a b\"~2",
|
||||
r#"("[(0, Term(type=Str, field=0, "a")), (1, Term(type=Str, field=0, "b"))]"~2 "[(0, Term(type=Str, field=1, "a")), (1, Term(type=Str, field=1, "b"))]"~2)"#,
|
||||
r#"("[(0, Term(field=0, type=Str, "a")), (1, Term(field=0, type=Str, "b"))]"~2 "[(0, Term(field=1, type=Str, "a")), (1, Term(field=1, type=Str, "b"))]"~2)"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:\"a b~4\"~2",
|
||||
r#""[(0, Term(type=Str, field=0, "a")), (1, Term(type=Str, field=0, "b")), (2, Term(type=Str, field=0, "4"))]"~2"#,
|
||||
r#""[(0, Term(field=0, type=Str, "a")), (1, Term(field=0, type=Str, "b")), (2, Term(field=0, type=Str, "4"))]"~2"#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1633,23 +1635,23 @@ mod test {
|
||||
pub fn test_term_set_query() {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title: IN [a b cd]",
|
||||
r#"IN [Term(type=Str, field=0, "a"), Term(type=Str, field=0, "b"), Term(type=Str, field=0, "cd")]"#,
|
||||
r#"IN [Term(field=0, type=Str, "a"), Term(field=0, type=Str, "b"), Term(field=0, type=Str, "cd")]"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"bytes: IN [AA== ABA= ABCD]",
|
||||
r#"IN [Term(type=Bytes, field=12, [0]), Term(type=Bytes, field=12, [0, 16]), Term(type=Bytes, field=12, [0, 16, 131])]"#,
|
||||
r#"IN [Term(field=12, type=Bytes, [0]), Term(field=12, type=Bytes, [0, 16]), Term(field=12, type=Bytes, [0, 16, 131])]"#,
|
||||
false,
|
||||
);
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"signed: IN [1 2 -3]",
|
||||
r#"IN [Term(type=I64, field=2, 1), Term(type=I64, field=2, 2), Term(type=I64, field=2, -3)]"#,
|
||||
r#"IN [Term(field=2, type=I64, 1), Term(field=2, type=I64, 2), Term(field=2, type=I64, -3)]"#,
|
||||
false,
|
||||
);
|
||||
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"float: IN [1.1 2.2 -3.3]",
|
||||
r#"IN [Term(type=F64, field=10, 1.1), Term(type=F64, field=10, 2.2), Term(type=F64, field=10, -3.3)]"#,
|
||||
r#"IN [Term(field=10, type=F64, 1.1), Term(field=10, type=F64, 2.2), Term(field=10, type=F64, -3.3)]"#,
|
||||
false,
|
||||
);
|
||||
}
|
||||
@@ -1667,9 +1669,9 @@ mod test {
|
||||
let query = query_parser.parse_query("abc").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", query),
|
||||
"BooleanQuery { subqueries: [(Should, FuzzyTermQuery { term: Term(type=Str, \
|
||||
field=0, \"abc\"), distance: 1, transposition_cost_one: true, prefix: false }), \
|
||||
(Should, TermQuery(Term(type=Str, field=1, \"abc\")))] }"
|
||||
"BooleanQuery { subqueries: [(Should, FuzzyTermQuery { term: Term(field=0, \
|
||||
type=Str, \"abc\"), distance: 1, transposition_cost_one: true, prefix: false }), \
|
||||
(Should, TermQuery(Term(field=1, type=Str, \"abc\")))] }"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1684,8 +1686,8 @@ mod test {
|
||||
let query = query_parser.parse_query("abc").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", query),
|
||||
"BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, \
|
||||
\"abc\"))), (Should, FuzzyTermQuery { term: Term(type=Str, field=1, \"abc\"), \
|
||||
"BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \
|
||||
\"abc\"))), (Should, FuzzyTermQuery { term: Term(field=1, type=Str, \"abc\"), \
|
||||
distance: 2, transposition_cost_one: false, prefix: true })] }"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ impl RangeQuery {
|
||||
left_bound: &Bound<Term>,
|
||||
right_bound: &Bound<Term>,
|
||||
) -> RangeQuery {
|
||||
let verify_and_unwrap_term = |val: &Term| val.value_bytes().to_owned();
|
||||
let verify_and_unwrap_term = |val: &Term| val.serialized_value_bytes().to_owned();
|
||||
RangeQuery {
|
||||
field,
|
||||
value_type,
|
||||
@@ -121,7 +121,7 @@ impl RangeQuery {
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &i64| {
|
||||
Term::from_field_i64(Field::from_field_id(0), *val)
|
||||
.value_bytes()
|
||||
.serialized_value_bytes()
|
||||
.to_owned()
|
||||
};
|
||||
RangeQuery {
|
||||
@@ -159,7 +159,7 @@ impl RangeQuery {
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &f64| {
|
||||
Term::from_field_f64(Field::from_field_id(0), *val)
|
||||
.value_bytes()
|
||||
.serialized_value_bytes()
|
||||
.to_owned()
|
||||
};
|
||||
RangeQuery {
|
||||
@@ -185,7 +185,7 @@ impl RangeQuery {
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &u64| {
|
||||
Term::from_field_u64(Field::from_field_id(0), *val)
|
||||
.value_bytes()
|
||||
.serialized_value_bytes()
|
||||
.to_owned()
|
||||
};
|
||||
RangeQuery {
|
||||
@@ -208,7 +208,7 @@ impl RangeQuery {
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &Ipv6Addr| {
|
||||
Term::from_field_ip_addr(Field::from_field_id(0), *val)
|
||||
.value_bytes()
|
||||
.serialized_value_bytes()
|
||||
.to_owned()
|
||||
};
|
||||
RangeQuery {
|
||||
@@ -246,7 +246,7 @@ impl RangeQuery {
|
||||
) -> RangeQuery {
|
||||
let make_term_val = |val: &DateTime| {
|
||||
Term::from_field_date(Field::from_field_id(0), *val)
|
||||
.value_bytes()
|
||||
.serialized_value_bytes()
|
||||
.to_owned()
|
||||
};
|
||||
RangeQuery {
|
||||
|
||||
@@ -47,8 +47,12 @@ impl TermSetQuery {
|
||||
// In practice this won't fail because:
|
||||
// - we are writing to memory, so no IoError
|
||||
// - Terms are ordered
|
||||
let map = Map::from_iter(sorted_terms.iter().map(|key| (key.value_bytes(), 0)))
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
|
||||
let map = Map::from_iter(
|
||||
sorted_terms
|
||||
.iter()
|
||||
.map(|key| (key.serialized_value_bytes(), 0)),
|
||||
)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
|
||||
|
||||
sub_queries.push((
|
||||
Occur::Should,
|
||||
|
||||
@@ -175,7 +175,7 @@ mod tests {
|
||||
);
|
||||
assert_eq!(
|
||||
format!("{:?}", term_query),
|
||||
r#"TermQuery(Term(type=Str, field=1, "hello"))"#
|
||||
r#"TermQuery(Term(field=1, type=Str, "hello"))"#
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,9 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::schema::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
|
||||
|
||||
/// The precision of the indexed date/time values in the inverted index.
|
||||
pub const DATE_TIME_PRECISION_INDEXED: DatePrecision = DatePrecision::Seconds;
|
||||
|
||||
/// Defines how DateTime field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct DateOptions {
|
||||
@@ -85,7 +88,8 @@ impl DateOptions {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the precision for this DateTime field.
|
||||
/// Sets the precision for this DateTime field on the fast field.
|
||||
/// Indexed precision is always [`DATE_TIME_PRECISION_INDEXED`].
|
||||
///
|
||||
/// Internal storage precision, used to optimize storage
|
||||
/// compression on fast fields.
|
||||
|
||||
@@ -129,7 +129,7 @@ mod value;
|
||||
use columnar::ColumnType;
|
||||
|
||||
pub use self::bytes_options::BytesOptions;
|
||||
pub use self::date_time_options::{DateOptions, DatePrecision};
|
||||
pub use self::date_time_options::{DateOptions, DatePrecision, DATE_TIME_PRECISION_INDEXED};
|
||||
pub use self::document::Document;
|
||||
pub(crate) use self::facet::FACET_SEP_BYTE;
|
||||
pub use self::facet::{Facet, FacetParseError};
|
||||
@@ -147,7 +147,7 @@ pub use self::named_field_document::NamedFieldDocument;
|
||||
pub use self::numeric_options::IntOptions;
|
||||
pub use self::numeric_options::NumericOptions;
|
||||
pub use self::schema::{DocParsingError, Schema, SchemaBuilder};
|
||||
pub use self::term::Term;
|
||||
pub use self::term::{Term, ValueBytes, JSON_END_OF_PATH};
|
||||
pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};
|
||||
pub use self::value::Value;
|
||||
|
||||
|
||||
@@ -5,10 +5,11 @@ use std::{fmt, str};
|
||||
|
||||
use columnar::MonotonicallyMappableToU128;
|
||||
|
||||
use super::date_time_options::DATE_TIME_PRECISION_INDEXED;
|
||||
use super::Field;
|
||||
use crate::fastfield::FastValue;
|
||||
use crate::schema::{Facet, Type};
|
||||
use crate::{DatePrecision, DateTime};
|
||||
use crate::DateTime;
|
||||
|
||||
/// Separates the different segments of a json path.
|
||||
pub const JSON_PATH_SEGMENT_SEP: u8 = 1u8;
|
||||
@@ -20,8 +21,12 @@ pub const JSON_PATH_SEGMENT_SEP_STR: &str =
|
||||
pub const JSON_END_OF_PATH: u8 = 0u8;
|
||||
|
||||
/// Term represents the value that the token can take.
|
||||
/// It's a serialized representation over different types.
|
||||
///
|
||||
/// It actually wraps a `Vec<u8>`.
|
||||
/// It actually wraps a `Vec<u8>`. The first 5 bytes are metadata.
|
||||
/// 4 bytes are the field id, and the last byte is the type.
|
||||
///
|
||||
/// The serialized value `ValueBytes` is considered everything after the 4 first bytes (term id).
|
||||
#[derive(Clone)]
|
||||
pub struct Term<B = Vec<u8>>(B)
|
||||
where B: AsRef<[u8]>;
|
||||
@@ -100,7 +105,7 @@ impl Term {
|
||||
|
||||
/// Builds a term given a field, and a `DateTime` value
|
||||
pub fn from_field_date(field: Field, val: DateTime) -> Term {
|
||||
Term::from_fast_value(field, &val.truncate(DatePrecision::Seconds))
|
||||
Term::from_fast_value(field, &val.truncate(DATE_TIME_PRECISION_INDEXED))
|
||||
}
|
||||
|
||||
/// Creates a `Term` given a facet.
|
||||
@@ -186,11 +191,6 @@ impl Term {
|
||||
self.0.truncate(len + TERM_METADATA_LENGTH);
|
||||
}
|
||||
|
||||
/// Returns the value bytes as mutable slice
|
||||
pub fn value_bytes_mut(&mut self) -> &mut [u8] {
|
||||
&mut self.0[TERM_METADATA_LENGTH..]
|
||||
}
|
||||
|
||||
/// The length of the bytes.
|
||||
pub fn len_bytes(&self) -> usize {
|
||||
self.0.len() - TERM_METADATA_LENGTH
|
||||
@@ -206,44 +206,25 @@ impl Term {
|
||||
&mut self.0[len_before..]
|
||||
}
|
||||
|
||||
/// Appends a single byte to the term.
|
||||
/// Appends a JSON_PATH_SEGMENT_SEP to the term.
|
||||
/// Only used for JSON type.
|
||||
#[inline]
|
||||
pub fn push_byte(&mut self, byte: u8) {
|
||||
self.0.push(byte);
|
||||
pub fn add_json_path_separator(&mut self) {
|
||||
self.0.push(JSON_PATH_SEGMENT_SEP);
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Ord for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.as_slice().cmp(other.as_slice())
|
||||
/// Sets the current end to JSON_END_OF_PATH.
|
||||
/// Only used for JSON type.
|
||||
#[inline]
|
||||
pub fn set_json_path_end(&mut self) {
|
||||
let buffer_len = self.0.len();
|
||||
self.0[buffer_len - 1] = JSON_END_OF_PATH;
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> PartialOrd for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> PartialEq for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_slice() == other.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Eq for Term<B> where B: AsRef<[u8]> {}
|
||||
|
||||
impl<B> Hash for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.0.as_ref().hash(state)
|
||||
/// Sets the current end to JSON_PATH_SEGMENT_SEP.
|
||||
/// Only used for JSON type.
|
||||
#[inline]
|
||||
pub fn set_json_path_separator(&mut self) {
|
||||
let buffer_len = self.0.len();
|
||||
self.0[buffer_len - 1] = JSON_PATH_SEGMENT_SEP;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -255,11 +236,68 @@ where B: AsRef<[u8]>
|
||||
Term(data)
|
||||
}
|
||||
|
||||
/// Return the type of the term.
|
||||
pub fn typ(&self) -> Type {
|
||||
self.value().typ()
|
||||
}
|
||||
|
||||
/// Returns the field.
|
||||
pub fn field(&self) -> Field {
|
||||
let field_id_bytes: [u8; 4] = (&self.0.as_ref()[..4]).try_into().unwrap();
|
||||
Field::from_field_id(u32::from_be_bytes(field_id_bytes))
|
||||
}
|
||||
|
||||
/// Returns the serialized representation of the value.
|
||||
/// (this does neither include the field id nor the value type.)
|
||||
///
|
||||
/// If the term is a string, its value is utf-8 encoded.
|
||||
/// If the term is a u64, its value is encoded according
|
||||
/// to `byteorder::BigEndian`.
|
||||
pub fn serialized_value_bytes(&self) -> &[u8] {
|
||||
&self.0.as_ref()[TERM_METADATA_LENGTH..]
|
||||
}
|
||||
|
||||
/// Returns the value of the term.
|
||||
/// address or JSON path + value. (this does not include the field.)
|
||||
pub fn value(&self) -> ValueBytes<&[u8]> {
|
||||
ValueBytes::wrap(&self.0.as_ref()[4..])
|
||||
}
|
||||
|
||||
/// Returns the serialized representation of Term.
|
||||
/// This includes field_id, value type and value.
|
||||
///
|
||||
/// Do NOT rely on this byte representation in the index.
|
||||
/// This value is likely to change in the future.
|
||||
pub fn serialized_term(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// ValueBytes represents a serialized value.
|
||||
/// The value can be of any type of [`Type`] (e.g. string, u64, f64, bool, date, JSON).
|
||||
/// The serialized representation matches the lexographical order of the type.
|
||||
///
|
||||
/// The `ValueBytes` format is as follow:
|
||||
/// `[type code: u8][serialized value]`
|
||||
///
|
||||
/// For JSON `ValueBytes` equals to:
|
||||
/// `[type code=JSON][JSON path][JSON_END_OF_PATH][ValueBytes]`
|
||||
///
|
||||
/// The nested ValueBytes in JSON is never of type JSON. (there's no recursion)
|
||||
#[derive(Clone)]
|
||||
pub struct ValueBytes<B>(B)
|
||||
where B: AsRef<[u8]>;
|
||||
|
||||
impl<B> ValueBytes<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
/// Wraps a object holding bytes
|
||||
pub fn wrap(data: B) -> ValueBytes<B> {
|
||||
ValueBytes(data)
|
||||
}
|
||||
|
||||
fn typ_code(&self) -> u8 {
|
||||
*self
|
||||
.as_slice()
|
||||
.get(4)
|
||||
.expect("the byte representation is too short")
|
||||
self.0.as_ref()[0]
|
||||
}
|
||||
|
||||
/// Return the type of the term.
|
||||
@@ -267,13 +305,6 @@ where B: AsRef<[u8]>
|
||||
Type::from_code(self.typ_code()).expect("The term has an invalid type code")
|
||||
}
|
||||
|
||||
/// Returns the field.
|
||||
pub fn field(&self) -> Field {
|
||||
let mut field_id_bytes = [0u8; 4];
|
||||
field_id_bytes.copy_from_slice(&self.0.as_ref()[..4]);
|
||||
Field::from_field_id(u32::from_be_bytes(field_id_bytes))
|
||||
}
|
||||
|
||||
/// Returns the `u64` value stored in a term.
|
||||
///
|
||||
/// Returns `None` if the term is not of the u64 type, or if the term byte representation
|
||||
@@ -286,13 +317,8 @@ where B: AsRef<[u8]>
|
||||
if self.typ() != T::to_type() {
|
||||
return None;
|
||||
}
|
||||
let mut value_bytes = [0u8; 8];
|
||||
let bytes = self.value_bytes();
|
||||
if bytes.len() != 8 {
|
||||
return None;
|
||||
}
|
||||
value_bytes.copy_from_slice(self.value_bytes());
|
||||
let value_u64 = u64::from_be_bytes(value_bytes);
|
||||
let value_bytes = self.value_bytes();
|
||||
let value_u64 = u64::from_be_bytes(value_bytes.try_into().ok()?);
|
||||
Some(T::from_u64(value_u64))
|
||||
}
|
||||
|
||||
@@ -361,23 +387,133 @@ where B: AsRef<[u8]>
|
||||
Some(self.value_bytes())
|
||||
}
|
||||
|
||||
/// Returns the serialized value of the term.
|
||||
/// (this does not include the field.)
|
||||
///
|
||||
/// If the term is a string, its value is utf-8 encoded.
|
||||
/// If the term is a u64, its value is encoded according
|
||||
/// to `byteorder::BigEndian`.
|
||||
pub fn value_bytes(&self) -> &[u8] {
|
||||
&self.0.as_ref()[TERM_METADATA_LENGTH..]
|
||||
/// Returns a `Ipv6Addr` value from the term.
|
||||
pub fn as_ip_addr(&self) -> Option<Ipv6Addr> {
|
||||
if self.typ() != Type::IpAddr {
|
||||
return None;
|
||||
}
|
||||
let ip_u128 = u128::from_be_bytes(self.value_bytes().try_into().ok()?);
|
||||
Some(Ipv6Addr::from_u128(ip_u128))
|
||||
}
|
||||
|
||||
/// Returns the underlying `&[u8]`.
|
||||
/// Returns the json path (without non-human friendly separators),
|
||||
/// and the encoded ValueBytes after the json path.
|
||||
///
|
||||
/// Returns `None` if the value is not JSON.
|
||||
pub(crate) fn as_json(&self) -> Option<(&str, ValueBytes<&[u8]>)> {
|
||||
if self.typ() != Type::Json {
|
||||
return None;
|
||||
}
|
||||
let bytes = self.value_bytes();
|
||||
|
||||
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
|
||||
let (json_path_bytes, term) = bytes.split_at(pos);
|
||||
let json_path = str::from_utf8(json_path_bytes).ok()?;
|
||||
Some((json_path, ValueBytes::wrap(&term[1..])))
|
||||
}
|
||||
|
||||
/// Returns the encoded ValueBytes after the json path.
|
||||
///
|
||||
/// Returns `None` if the value is not JSON.
|
||||
pub(crate) fn as_json_value_bytes(&self) -> Option<ValueBytes<&[u8]>> {
|
||||
if self.typ() != Type::Json {
|
||||
return None;
|
||||
}
|
||||
let bytes = self.value_bytes();
|
||||
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
|
||||
Some(ValueBytes::wrap(&bytes[pos + 1..]))
|
||||
}
|
||||
|
||||
/// Returns the serialized value of ValueBytes without the type.
|
||||
fn value_bytes(&self) -> &[u8] {
|
||||
&self.0.as_ref()[1..]
|
||||
}
|
||||
|
||||
/// Returns the serialized representation of Term.
|
||||
///
|
||||
/// Do NOT rely on this byte representation in the index.
|
||||
/// This value is likely to change in the future.
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
pub fn as_serialized(&self) -> &[u8] {
|
||||
self.0.as_ref()
|
||||
}
|
||||
|
||||
fn debug_value_bytes(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let typ = self.typ();
|
||||
write!(f, "type={typ:?}, ")?;
|
||||
match typ {
|
||||
Type::Str => {
|
||||
let s = self.as_str();
|
||||
write_opt(f, s)?;
|
||||
}
|
||||
Type::U64 => {
|
||||
write_opt(f, self.as_u64())?;
|
||||
}
|
||||
Type::I64 => {
|
||||
write_opt(f, self.as_i64())?;
|
||||
}
|
||||
Type::F64 => {
|
||||
write_opt(f, self.as_f64())?;
|
||||
}
|
||||
Type::Bool => {
|
||||
write_opt(f, self.as_bool())?;
|
||||
}
|
||||
// TODO pretty print these types too.
|
||||
Type::Date => {
|
||||
write_opt(f, self.as_date())?;
|
||||
}
|
||||
Type::Facet => {
|
||||
write_opt(f, self.as_facet())?;
|
||||
}
|
||||
Type::Bytes => {
|
||||
write_opt(f, self.as_bytes())?;
|
||||
}
|
||||
Type::Json => {
|
||||
if let Some((path, sub_value_bytes)) = self.as_json() {
|
||||
let path_pretty = path.replace(JSON_PATH_SEGMENT_SEP_STR, ".");
|
||||
write!(f, "path={path_pretty}, ")?;
|
||||
sub_value_bytes.debug_value_bytes(f)?;
|
||||
}
|
||||
}
|
||||
Type::IpAddr => {
|
||||
write_opt(f, self.as_ip_addr())?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Ord for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.serialized_term().cmp(other.serialized_term())
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> PartialOrd for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> PartialEq for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.serialized_term() == other.serialized_term()
|
||||
}
|
||||
}
|
||||
|
||||
impl<B> Eq for Term<B> where B: AsRef<[u8]> {}
|
||||
|
||||
impl<B> Hash for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.0.as_ref().hash(state)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_opt<T: std::fmt::Debug>(f: &mut fmt::Formatter, val_opt: Option<T>) -> fmt::Result {
|
||||
@@ -387,80 +523,14 @@ fn write_opt<T: std::fmt::Debug>(f: &mut fmt::Formatter, val_opt: Option<T>) ->
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn as_str(value_bytes: &[u8]) -> Option<&str> {
|
||||
std::str::from_utf8(value_bytes).ok()
|
||||
}
|
||||
|
||||
fn get_fast_type<T: FastValue>(bytes: &[u8]) -> Option<T> {
|
||||
let value_u64 = u64::from_be_bytes(bytes.try_into().ok()?);
|
||||
Some(T::from_u64(value_u64))
|
||||
}
|
||||
|
||||
/// Returns the json path (without non-human friendly separators, the type of the value, and the
|
||||
/// value bytes). Returns `None` if the value is not JSON or is not valid.
|
||||
pub(crate) fn as_json_path_type_value_bytes(bytes: &[u8]) -> Option<(&str, Type, &[u8])> {
|
||||
let pos = bytes.iter().cloned().position(|b| b == JSON_END_OF_PATH)?;
|
||||
let json_path = str::from_utf8(&bytes[..pos]).ok()?;
|
||||
let type_code = *bytes.get(pos + 1)?;
|
||||
let typ = Type::from_code(type_code)?;
|
||||
Some((json_path, typ, &bytes[pos + 2..]))
|
||||
}
|
||||
|
||||
fn debug_value_bytes(typ: Type, bytes: &[u8], f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match typ {
|
||||
Type::Str => {
|
||||
let s = as_str(bytes);
|
||||
write_opt(f, s)?;
|
||||
}
|
||||
Type::U64 => {
|
||||
write_opt(f, get_fast_type::<u64>(bytes))?;
|
||||
}
|
||||
Type::I64 => {
|
||||
write_opt(f, get_fast_type::<i64>(bytes))?;
|
||||
}
|
||||
Type::F64 => {
|
||||
write_opt(f, get_fast_type::<f64>(bytes))?;
|
||||
}
|
||||
Type::Bool => {
|
||||
write_opt(f, get_fast_type::<bool>(bytes))?;
|
||||
}
|
||||
// TODO pretty print these types too.
|
||||
Type::Date => {
|
||||
write_opt(f, get_fast_type::<DateTime>(bytes))?;
|
||||
}
|
||||
Type::Facet => {
|
||||
let facet_str = str::from_utf8(bytes)
|
||||
.ok()
|
||||
.map(ToString::to_string)
|
||||
.map(Facet::from_encoded_string)
|
||||
.map(|facet| facet.to_path_string());
|
||||
write_opt(f, facet_str)?;
|
||||
}
|
||||
Type::Bytes => {
|
||||
write_opt(f, Some(bytes))?;
|
||||
}
|
||||
Type::Json => {
|
||||
if let Some((path, typ, bytes)) = as_json_path_type_value_bytes(bytes) {
|
||||
let path_pretty = path.replace(JSON_PATH_SEGMENT_SEP_STR, ".");
|
||||
write!(f, "path={path_pretty}, vtype={typ:?}, ")?;
|
||||
debug_value_bytes(typ, bytes, f)?;
|
||||
}
|
||||
}
|
||||
Type::IpAddr => {
|
||||
write!(f, "")?; // TODO change once we actually have IP address terms.
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl<B> fmt::Debug for Term<B>
|
||||
where B: AsRef<[u8]>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let field_id = self.field().field_id();
|
||||
let typ = self.typ();
|
||||
write!(f, "Term(type={typ:?}, field={field_id}, ")?;
|
||||
debug_value_bytes(typ, self.value_bytes(), f)?;
|
||||
write!(f, "Term(field={field_id}, ")?;
|
||||
let value_bytes = ValueBytes::wrap(&self.0.as_ref()[4..]);
|
||||
value_bytes.debug_value_bytes(f)?;
|
||||
write!(f, ")",)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -479,7 +549,7 @@ mod tests {
|
||||
let term = Term::from_field_text(title_field, "test");
|
||||
assert_eq!(term.field(), title_field);
|
||||
assert_eq!(term.typ(), Type::Str);
|
||||
assert_eq!(term.as_str(), Some("test"))
|
||||
assert_eq!(term.value().as_str(), Some("test"))
|
||||
}
|
||||
|
||||
/// Size (in bytes) of the buffer of a fast value (u64, i64, f64, or date) term.
|
||||
@@ -501,8 +571,8 @@ mod tests {
|
||||
let term = Term::from_field_u64(count_field, 983u64);
|
||||
assert_eq!(term.field(), count_field);
|
||||
assert_eq!(term.typ(), Type::U64);
|
||||
assert_eq!(term.as_slice().len(), FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.as_u64(), Some(983u64))
|
||||
assert_eq!(term.serialized_term().len(), FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.value().as_u64(), Some(983u64))
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -512,7 +582,7 @@ mod tests {
|
||||
let term = Term::from_field_bool(bool_field, true);
|
||||
assert_eq!(term.field(), bool_field);
|
||||
assert_eq!(term.typ(), Type::Bool);
|
||||
assert_eq!(term.as_slice().len(), FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.as_bool(), Some(true))
|
||||
assert_eq!(term.serialized_term().len(), FAST_VALUE_TERM_LEN);
|
||||
assert_eq!(term.value().as_bool(), Some(true))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -310,7 +310,8 @@ impl SnippetGenerator {
|
||||
});
|
||||
let mut terms_text: BTreeMap<String, Score> = Default::default();
|
||||
for term in terms {
|
||||
let term_str = if let Some(term_str) = term.as_str() {
|
||||
let term_value = term.value();
|
||||
let term_str = if let Some(term_str) = term_value.as_str() {
|
||||
term_str
|
||||
} else {
|
||||
continue;
|
||||
|
||||
Reference in New Issue
Block a user