mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-03 00:50:41 +00:00
@@ -57,14 +57,15 @@ impl BytesFastFieldWriter {
|
||||
|
||||
/// Shift to the next document and add all of the
|
||||
/// matching field values present in the document.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
self.next_doc();
|
||||
for field_value in doc.get_all(self.field) {
|
||||
if let Value::Bytes(ref bytes) = field_value {
|
||||
self.vals.extend_from_slice(bytes);
|
||||
return;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register the bytes associated with a document.
|
||||
|
||||
@@ -34,6 +34,7 @@ pub use self::multivalued::{
|
||||
pub use self::readers::FastFieldReaders;
|
||||
pub(crate) use self::readers::{type_and_cardinality, FastType};
|
||||
pub use self::serializer::{Column, CompositeFastFieldSerializer};
|
||||
use self::writer::unexpected_value;
|
||||
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
|
||||
use crate::schema::{Type, Value};
|
||||
use crate::{DateTime, DocId};
|
||||
@@ -120,15 +121,16 @@ impl FastValue for DateTime {
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_u64(value: &Value) -> u64 {
|
||||
match value {
|
||||
fn value_to_u64(value: &Value) -> crate::Result<u64> {
|
||||
let value = match value {
|
||||
Value::U64(val) => val.to_u64(),
|
||||
Value::I64(val) => val.to_u64(),
|
||||
Value::F64(val) => val.to_u64(),
|
||||
Value::Bool(val) => val.to_u64(),
|
||||
Value::Date(val) => val.to_u64(),
|
||||
_ => panic!("Expected a u64/i64/f64/bool/date field, got {:?} ", value),
|
||||
}
|
||||
_ => return Err(unexpected_value("u64/i64/f64/bool/date", value)),
|
||||
};
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
/// The fast field type
|
||||
@@ -202,9 +204,15 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>13u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>14u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>2u64));
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>13u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>14u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>2u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
@@ -229,15 +237,33 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>4u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>3_052u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>9_002u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>15_001u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>777u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>1_002u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>1_501u64));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>215u64));
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>4u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>14_082_001u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>3_052u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>9_002u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>15_001u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>777u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>1_002u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>1_501u64))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>215u64))
|
||||
.unwrap();
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
}
|
||||
@@ -273,7 +299,9 @@ mod tests {
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for _ in 0..10_000 {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>100_000u64));
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>100_000u64))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
@@ -306,9 +334,13 @@ mod tests {
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
// forcing the amplitude to be high
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>0u64));
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>0u64))
|
||||
.unwrap();
|
||||
for i in 0u64..10_000u64 {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i));
|
||||
fast_field_writers
|
||||
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
@@ -350,7 +382,7 @@ mod tests {
|
||||
for i in -100i64..10_000i64 {
|
||||
let mut doc = Document::default();
|
||||
doc.add_i64(i64_field, i);
|
||||
fast_field_writers.add_document(&doc);
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
@@ -395,7 +427,7 @@ mod tests {
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let doc = Document::default();
|
||||
fast_field_writers.add_document(&doc);
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
@@ -438,7 +470,7 @@ mod tests {
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
|
||||
for &x in &permutation {
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x));
|
||||
fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap();
|
||||
}
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
@@ -788,10 +820,14 @@ mod tests {
|
||||
let write: WritePtr = directory.open_write(path).unwrap();
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
fast_field_writers.add_document(&doc!(field=>true));
|
||||
fast_field_writers.add_document(&doc!(field=>false));
|
||||
fast_field_writers.add_document(&doc!(field=>true));
|
||||
fast_field_writers.add_document(&doc!(field=>false));
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
.unwrap();
|
||||
@@ -825,8 +861,10 @@ mod tests {
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
for _ in 0..50 {
|
||||
fast_field_writers.add_document(&doc!(field=>true));
|
||||
fast_field_writers.add_document(&doc!(field=>false));
|
||||
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
|
||||
fast_field_writers
|
||||
.add_document(&doc!(field=>false))
|
||||
.unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
@@ -860,7 +898,7 @@ mod tests {
|
||||
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||
let doc = Document::default();
|
||||
fast_field_writers.add_document(&doc);
|
||||
fast_field_writers.add_document(&doc).unwrap();
|
||||
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
|
||||
serializer.close()?;
|
||||
}
|
||||
@@ -886,7 +924,7 @@ mod tests {
|
||||
CompositeFastFieldSerializer::from_write_with_codec(write, codec_types).unwrap();
|
||||
let mut fast_field_writers = FastFieldsWriter::from_schema(schema);
|
||||
for doc in docs {
|
||||
fast_field_writers.add_document(doc);
|
||||
fast_field_writers.add_document(doc).unwrap();
|
||||
}
|
||||
fast_field_writers
|
||||
.serialize(&mut serializer, &HashMap::new(), None)
|
||||
|
||||
@@ -6,6 +6,7 @@ use fastfield_codecs::{
|
||||
use fnv::FnvHashMap;
|
||||
|
||||
use super::get_fastfield_codecs_for_multivalue;
|
||||
use crate::fastfield::writer::unexpected_value;
|
||||
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
|
||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||
use crate::postings::UnorderedTermId;
|
||||
@@ -81,11 +82,11 @@ impl MultiValuedFastFieldWriter {
|
||||
|
||||
/// Shift to the next document and adds
|
||||
/// all of the matching field values present in the document.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
self.next_doc();
|
||||
// facets/texts are indexed in the `SegmentWriter` as we encode their unordered id.
|
||||
if self.fast_field_type.is_storing_term_ids() {
|
||||
return;
|
||||
return Ok(());
|
||||
}
|
||||
for field_value in doc.field_values() {
|
||||
if field_value.field == self.field {
|
||||
@@ -94,11 +95,12 @@ impl MultiValuedFastFieldWriter {
|
||||
(Some(precision), Value::Date(date_val)) => {
|
||||
date_val.truncate(precision).to_u64()
|
||||
}
|
||||
_ => value_to_u64(value),
|
||||
_ => value_to_u64(value)?,
|
||||
};
|
||||
self.add_val(value_u64);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns an iterator over values per doc_id in ascending doc_id order.
|
||||
@@ -312,18 +314,19 @@ impl MultiValueU128FastFieldWriter {
|
||||
|
||||
/// Shift to the next document and adds
|
||||
/// all of the matching field values present in the document.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
self.next_doc();
|
||||
for field_value in doc.field_values() {
|
||||
if field_value.field == self.field {
|
||||
let value = field_value.value();
|
||||
let ip_addr = value
|
||||
.as_ip_addr()
|
||||
.unwrap_or_else(|| panic!("expected and ip, but got {:?}", value));
|
||||
.ok_or_else(|| unexpected_value("ip", value))?;
|
||||
let ip_addr_u128 = ip_addr.to_u128();
|
||||
self.add_val(ip_addr_u128);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns an iterator over values per doc_id in ascending doc_id order.
|
||||
|
||||
@@ -25,6 +25,13 @@ pub struct FastFieldsWriter {
|
||||
bytes_value_writers: Vec<BytesFastFieldWriter>,
|
||||
}
|
||||
|
||||
pub(crate) fn unexpected_value(expected: &str, actual: &Value) -> crate::TantivyError {
|
||||
crate::TantivyError::SchemaError(format!(
|
||||
"Expected a {:?} in fast field, but got {:?}",
|
||||
expected, actual
|
||||
))
|
||||
}
|
||||
|
||||
fn fast_field_default_value(field_entry: &FieldEntry) -> u64 {
|
||||
match *field_entry.field_type() {
|
||||
FieldType::I64(_) | FieldType::Date(_) => common::i64_to_u64(0i64),
|
||||
@@ -222,25 +229,26 @@ impl FastFieldsWriter {
|
||||
.find(|field_writer| field_writer.field() == field)
|
||||
}
|
||||
/// Indexes all of the fastfields of a new document.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
for field_writer in &mut self.term_id_writers {
|
||||
field_writer.add_document(doc);
|
||||
field_writer.add_document(doc)?;
|
||||
}
|
||||
for field_writer in &mut self.single_value_writers {
|
||||
field_writer.add_document(doc);
|
||||
field_writer.add_document(doc)?;
|
||||
}
|
||||
for field_writer in &mut self.multi_values_writers {
|
||||
field_writer.add_document(doc);
|
||||
field_writer.add_document(doc)?;
|
||||
}
|
||||
for field_writer in &mut self.bytes_value_writers {
|
||||
field_writer.add_document(doc);
|
||||
field_writer.add_document(doc)?;
|
||||
}
|
||||
for field_writer in &mut self.u128_value_writers {
|
||||
field_writer.add_document(doc);
|
||||
field_writer.add_document(doc)?;
|
||||
}
|
||||
for field_writer in &mut self.u128_multi_value_writers {
|
||||
field_writer.add_document(doc);
|
||||
field_writer.add_document(doc)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Serializes all of the `FastFieldWriter`s by pushing them in
|
||||
@@ -324,13 +332,10 @@ impl U128FastFieldWriter {
|
||||
///
|
||||
/// Extract the value associated to the fast field for
|
||||
/// this document.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
match doc.get_first(self.field) {
|
||||
Some(v) => {
|
||||
let ip_addr = v
|
||||
.as_ip_addr()
|
||||
.unwrap_or_else(|| panic!("expected and ip, but got {:?}", v));
|
||||
|
||||
let ip_addr = v.as_ip_addr().ok_or_else(|| unexpected_value("ip", v))?;
|
||||
let value = ip_addr.to_u128();
|
||||
self.add_val(value);
|
||||
}
|
||||
@@ -339,6 +344,7 @@ impl U128FastFieldWriter {
|
||||
}
|
||||
};
|
||||
self.val_count += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Push the fast fields value to the `FastFieldWriter`.
|
||||
@@ -465,14 +471,14 @@ impl IntFastFieldWriter {
|
||||
/// only the first one is taken in account.
|
||||
///
|
||||
/// Values on text fast fields are skipped.
|
||||
pub fn add_document(&mut self, doc: &Document) {
|
||||
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
|
||||
match doc.get_first(self.field) {
|
||||
Some(v) => {
|
||||
let value = match (self.precision_opt, v) {
|
||||
(Some(precision), Value::Date(date_val)) => {
|
||||
date_val.truncate(precision).to_u64()
|
||||
}
|
||||
_ => super::value_to_u64(v),
|
||||
_ => super::value_to_u64(v)?,
|
||||
};
|
||||
self.add_val(value);
|
||||
}
|
||||
@@ -480,6 +486,7 @@ impl IntFastFieldWriter {
|
||||
self.add_val(self.val_if_missing);
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// get iterator over the data
|
||||
|
||||
@@ -306,7 +306,7 @@ impl SegmentWriter {
|
||||
pub fn add_document(&mut self, add_operation: AddOperation) -> crate::Result<()> {
|
||||
let doc = add_operation.document;
|
||||
self.doc_opstamps.push(add_operation.opstamp);
|
||||
self.fast_field_writers.add_document(&doc);
|
||||
self.fast_field_writers.add_document(&doc)?;
|
||||
self.index_document(&doc)?;
|
||||
let doc_writer = self.segment_serializer.get_store_writer();
|
||||
doc_writer.store(&doc, &self.schema)?;
|
||||
|
||||
Reference in New Issue
Block a user