return Error instead panic in fastfields

fixes #1572
This commit is contained in:
Pascal Seitz
2022-10-11 14:15:19 +08:00
parent 8b69aab0fc
commit 9cb8cfbea8
5 changed files with 101 additions and 52 deletions

View File

@@ -57,14 +57,15 @@ impl BytesFastFieldWriter {
/// Shift to the next document and add all of the
/// matching field values present in the document.
pub fn add_document(&mut self, doc: &Document) {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
self.next_doc();
for field_value in doc.get_all(self.field) {
if let Value::Bytes(ref bytes) = field_value {
self.vals.extend_from_slice(bytes);
return;
return Ok(());
}
}
Ok(())
}
/// Register the bytes associated with a document.

View File

@@ -34,6 +34,7 @@ pub use self::multivalued::{
pub use self::readers::FastFieldReaders;
pub(crate) use self::readers::{type_and_cardinality, FastType};
pub use self::serializer::{Column, CompositeFastFieldSerializer};
use self::writer::unexpected_value;
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use crate::schema::{Type, Value};
use crate::{DateTime, DocId};
@@ -120,15 +121,16 @@ impl FastValue for DateTime {
}
}
fn value_to_u64(value: &Value) -> u64 {
match value {
fn value_to_u64(value: &Value) -> crate::Result<u64> {
let value = match value {
Value::U64(val) => val.to_u64(),
Value::I64(val) => val.to_u64(),
Value::F64(val) => val.to_u64(),
Value::Bool(val) => val.to_u64(),
Value::Date(val) => val.to_u64(),
_ => panic!("Expected a u64/i64/f64/bool/date field, got {:?} ", value),
}
_ => return Err(unexpected_value("u64/i64/f64/bool/date", value)),
};
Ok(value)
}
/// The fast field type
@@ -202,9 +204,15 @@ mod tests {
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
fast_field_writers.add_document(&doc!(*FIELD=>13u64));
fast_field_writers.add_document(&doc!(*FIELD=>14u64));
fast_field_writers.add_document(&doc!(*FIELD=>2u64));
fast_field_writers
.add_document(&doc!(*FIELD=>13u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>14u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>2u64))
.unwrap();
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
@@ -229,15 +237,33 @@ mod tests {
let write: WritePtr = directory.open_write(Path::new("test"))?;
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
fast_field_writers.add_document(&doc!(*FIELD=>4u64));
fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64));
fast_field_writers.add_document(&doc!(*FIELD=>3_052u64));
fast_field_writers.add_document(&doc!(*FIELD=>9_002u64));
fast_field_writers.add_document(&doc!(*FIELD=>15_001u64));
fast_field_writers.add_document(&doc!(*FIELD=>777u64));
fast_field_writers.add_document(&doc!(*FIELD=>1_002u64));
fast_field_writers.add_document(&doc!(*FIELD=>1_501u64));
fast_field_writers.add_document(&doc!(*FIELD=>215u64));
fast_field_writers
.add_document(&doc!(*FIELD=>4u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>14_082_001u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>3_052u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>9_002u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>15_001u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>777u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>1_002u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>1_501u64))
.unwrap();
fast_field_writers
.add_document(&doc!(*FIELD=>215u64))
.unwrap();
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
serializer.close()?;
}
@@ -273,7 +299,9 @@ mod tests {
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for _ in 0..10_000 {
fast_field_writers.add_document(&doc!(*FIELD=>100_000u64));
fast_field_writers
.add_document(&doc!(*FIELD=>100_000u64))
.unwrap();
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
@@ -306,9 +334,13 @@ mod tests {
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
// forcing the amplitude to be high
fast_field_writers.add_document(&doc!(*FIELD=>0u64));
fast_field_writers
.add_document(&doc!(*FIELD=>0u64))
.unwrap();
for i in 0u64..10_000u64 {
fast_field_writers.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i));
fast_field_writers
.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i))
.unwrap();
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
@@ -350,7 +382,7 @@ mod tests {
for i in -100i64..10_000i64 {
let mut doc = Document::default();
doc.add_i64(i64_field, i);
fast_field_writers.add_document(&doc);
fast_field_writers.add_document(&doc).unwrap();
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
@@ -395,7 +427,7 @@ mod tests {
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
let doc = Document::default();
fast_field_writers.add_document(&doc);
fast_field_writers.add_document(&doc).unwrap();
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
@@ -438,7 +470,7 @@ mod tests {
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA);
for &x in &permutation {
fast_field_writers.add_document(&doc!(*FIELD=>x));
fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap();
}
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
serializer.close()?;
@@ -788,10 +820,14 @@ mod tests {
let write: WritePtr = directory.open_write(path).unwrap();
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
fast_field_writers.add_document(&doc!(field=>true));
fast_field_writers.add_document(&doc!(field=>false));
fast_field_writers.add_document(&doc!(field=>true));
fast_field_writers.add_document(&doc!(field=>false));
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
fast_field_writers
.add_document(&doc!(field=>false))
.unwrap();
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
fast_field_writers
.add_document(&doc!(field=>false))
.unwrap();
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
.unwrap();
@@ -825,8 +861,10 @@ mod tests {
let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
for _ in 0..50 {
fast_field_writers.add_document(&doc!(field=>true));
fast_field_writers.add_document(&doc!(field=>false));
fast_field_writers.add_document(&doc!(field=>true)).unwrap();
fast_field_writers
.add_document(&doc!(field=>false))
.unwrap();
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)
@@ -860,7 +898,7 @@ mod tests {
let mut serializer = CompositeFastFieldSerializer::from_write(write)?;
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
let doc = Document::default();
fast_field_writers.add_document(&doc);
fast_field_writers.add_document(&doc).unwrap();
fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?;
serializer.close()?;
}
@@ -886,7 +924,7 @@ mod tests {
CompositeFastFieldSerializer::from_write_with_codec(write, codec_types).unwrap();
let mut fast_field_writers = FastFieldsWriter::from_schema(schema);
for doc in docs {
fast_field_writers.add_document(doc);
fast_field_writers.add_document(doc).unwrap();
}
fast_field_writers
.serialize(&mut serializer, &HashMap::new(), None)

View File

@@ -6,6 +6,7 @@ use fastfield_codecs::{
use fnv::FnvHashMap;
use super::get_fastfield_codecs_for_multivalue;
use crate::fastfield::writer::unexpected_value;
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
@@ -81,11 +82,11 @@ impl MultiValuedFastFieldWriter {
/// Shift to the next document and adds
/// all of the matching field values present in the document.
pub fn add_document(&mut self, doc: &Document) {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
self.next_doc();
// facets/texts are indexed in the `SegmentWriter` as we encode their unordered id.
if self.fast_field_type.is_storing_term_ids() {
return;
return Ok(());
}
for field_value in doc.field_values() {
if field_value.field == self.field {
@@ -94,11 +95,12 @@ impl MultiValuedFastFieldWriter {
(Some(precision), Value::Date(date_val)) => {
date_val.truncate(precision).to_u64()
}
_ => value_to_u64(value),
_ => value_to_u64(value)?,
};
self.add_val(value_u64);
}
}
Ok(())
}
/// Returns an iterator over values per doc_id in ascending doc_id order.
@@ -312,18 +314,19 @@ impl MultiValueU128FastFieldWriter {
/// Shift to the next document and adds
/// all of the matching field values present in the document.
pub fn add_document(&mut self, doc: &Document) {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
self.next_doc();
for field_value in doc.field_values() {
if field_value.field == self.field {
let value = field_value.value();
let ip_addr = value
.as_ip_addr()
.unwrap_or_else(|| panic!("expected and ip, but got {:?}", value));
.ok_or_else(|| unexpected_value("ip", value))?;
let ip_addr_u128 = ip_addr.to_u128();
self.add_val(ip_addr_u128);
}
}
Ok(())
}
/// Returns an iterator over values per doc_id in ascending doc_id order.

View File

@@ -25,6 +25,13 @@ pub struct FastFieldsWriter {
bytes_value_writers: Vec<BytesFastFieldWriter>,
}
pub(crate) fn unexpected_value(expected: &str, actual: &Value) -> crate::TantivyError {
crate::TantivyError::SchemaError(format!(
"Expected a {:?} in fast field, but got {:?}",
expected, actual
))
}
fn fast_field_default_value(field_entry: &FieldEntry) -> u64 {
match *field_entry.field_type() {
FieldType::I64(_) | FieldType::Date(_) => common::i64_to_u64(0i64),
@@ -222,25 +229,26 @@ impl FastFieldsWriter {
.find(|field_writer| field_writer.field() == field)
}
/// Indexes all of the fastfields of a new document.
pub fn add_document(&mut self, doc: &Document) {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
for field_writer in &mut self.term_id_writers {
field_writer.add_document(doc);
field_writer.add_document(doc)?;
}
for field_writer in &mut self.single_value_writers {
field_writer.add_document(doc);
field_writer.add_document(doc)?;
}
for field_writer in &mut self.multi_values_writers {
field_writer.add_document(doc);
field_writer.add_document(doc)?;
}
for field_writer in &mut self.bytes_value_writers {
field_writer.add_document(doc);
field_writer.add_document(doc)?;
}
for field_writer in &mut self.u128_value_writers {
field_writer.add_document(doc);
field_writer.add_document(doc)?;
}
for field_writer in &mut self.u128_multi_value_writers {
field_writer.add_document(doc);
field_writer.add_document(doc)?;
}
Ok(())
}
/// Serializes all of the `FastFieldWriter`s by pushing them in
@@ -324,13 +332,10 @@ impl U128FastFieldWriter {
///
/// Extract the value associated to the fast field for
/// this document.
pub fn add_document(&mut self, doc: &Document) {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
match doc.get_first(self.field) {
Some(v) => {
let ip_addr = v
.as_ip_addr()
.unwrap_or_else(|| panic!("expected and ip, but got {:?}", v));
let ip_addr = v.as_ip_addr().ok_or_else(|| unexpected_value("ip", v))?;
let value = ip_addr.to_u128();
self.add_val(value);
}
@@ -339,6 +344,7 @@ impl U128FastFieldWriter {
}
};
self.val_count += 1;
Ok(())
}
/// Push the fast fields value to the `FastFieldWriter`.
@@ -465,14 +471,14 @@ impl IntFastFieldWriter {
/// only the first one is taken in account.
///
/// Values on text fast fields are skipped.
pub fn add_document(&mut self, doc: &Document) {
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
match doc.get_first(self.field) {
Some(v) => {
let value = match (self.precision_opt, v) {
(Some(precision), Value::Date(date_val)) => {
date_val.truncate(precision).to_u64()
}
_ => super::value_to_u64(v),
_ => super::value_to_u64(v)?,
};
self.add_val(value);
}
@@ -480,6 +486,7 @@ impl IntFastFieldWriter {
self.add_val(self.val_if_missing);
}
};
Ok(())
}
/// get iterator over the data

View File

@@ -306,7 +306,7 @@ impl SegmentWriter {
pub fn add_document(&mut self, add_operation: AddOperation) -> crate::Result<()> {
let doc = add_operation.document;
self.doc_opstamps.push(add_operation.opstamp);
self.fast_field_writers.add_document(&doc);
self.fast_field_writers.add_document(&doc)?;
self.index_document(&doc)?;
let doc_writer = self.segment_serializer.get_store_writer();
doc_writer.store(&doc, &self.schema)?;