From 9cb8cfbea8c205f289ed776daf23fd01079a71b3 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Tue, 11 Oct 2022 14:15:19 +0800 Subject: [PATCH] return Error instead panic in fastfields fixes #1572 --- src/fastfield/bytes/writer.rs | 5 +- src/fastfield/mod.rs | 98 ++++++++++++++++++++--------- src/fastfield/multivalued/writer.rs | 13 ++-- src/fastfield/writer.rs | 35 ++++++----- src/indexer/segment_writer.rs | 2 +- 5 files changed, 101 insertions(+), 52 deletions(-) diff --git a/src/fastfield/bytes/writer.rs b/src/fastfield/bytes/writer.rs index dcda5c3d7..b616d4e46 100644 --- a/src/fastfield/bytes/writer.rs +++ b/src/fastfield/bytes/writer.rs @@ -57,14 +57,15 @@ impl BytesFastFieldWriter { /// Shift to the next document and add all of the /// matching field values present in the document. - pub fn add_document(&mut self, doc: &Document) { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { self.next_doc(); for field_value in doc.get_all(self.field) { if let Value::Bytes(ref bytes) = field_value { self.vals.extend_from_slice(bytes); - return; + return Ok(()); } } + Ok(()) } /// Register the bytes associated with a document. diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index c825ee85c..7a43f9ed4 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -34,6 +34,7 @@ pub use self::multivalued::{ pub use self::readers::FastFieldReaders; pub(crate) use self::readers::{type_and_cardinality, FastType}; pub use self::serializer::{Column, CompositeFastFieldSerializer}; +use self::writer::unexpected_value; pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use crate::schema::{Type, Value}; use crate::{DateTime, DocId}; @@ -120,15 +121,16 @@ impl FastValue for DateTime { } } -fn value_to_u64(value: &Value) -> u64 { - match value { +fn value_to_u64(value: &Value) -> crate::Result { + let value = match value { Value::U64(val) => val.to_u64(), Value::I64(val) => val.to_u64(), Value::F64(val) => val.to_u64(), Value::Bool(val) => val.to_u64(), Value::Date(val) => val.to_u64(), - _ => panic!("Expected a u64/i64/f64/bool/date field, got {:?} ", value), - } + _ => return Err(unexpected_value("u64/i64/f64/bool/date", value)), + }; + Ok(value) } /// The fast field type @@ -202,9 +204,15 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test")).unwrap(); let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - fast_field_writers.add_document(&doc!(*FIELD=>13u64)); - fast_field_writers.add_document(&doc!(*FIELD=>14u64)); - fast_field_writers.add_document(&doc!(*FIELD=>2u64)); + fast_field_writers + .add_document(&doc!(*FIELD=>13u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>14u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>2u64)) + .unwrap(); fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) .unwrap(); @@ -229,15 +237,33 @@ mod tests { let write: WritePtr = directory.open_write(Path::new("test"))?; let mut serializer = CompositeFastFieldSerializer::from_write(write)?; let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); - fast_field_writers.add_document(&doc!(*FIELD=>4u64)); - fast_field_writers.add_document(&doc!(*FIELD=>14_082_001u64)); - fast_field_writers.add_document(&doc!(*FIELD=>3_052u64)); - fast_field_writers.add_document(&doc!(*FIELD=>9_002u64)); - fast_field_writers.add_document(&doc!(*FIELD=>15_001u64)); - fast_field_writers.add_document(&doc!(*FIELD=>777u64)); - fast_field_writers.add_document(&doc!(*FIELD=>1_002u64)); - fast_field_writers.add_document(&doc!(*FIELD=>1_501u64)); - fast_field_writers.add_document(&doc!(*FIELD=>215u64)); + fast_field_writers + .add_document(&doc!(*FIELD=>4u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>14_082_001u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>3_052u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>9_002u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>15_001u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>777u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>1_002u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>1_501u64)) + .unwrap(); + fast_field_writers + .add_document(&doc!(*FIELD=>215u64)) + .unwrap(); fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?; serializer.close()?; } @@ -273,7 +299,9 @@ mod tests { let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); for _ in 0..10_000 { - fast_field_writers.add_document(&doc!(*FIELD=>100_000u64)); + fast_field_writers + .add_document(&doc!(*FIELD=>100_000u64)) + .unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -306,9 +334,13 @@ mod tests { let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); // forcing the amplitude to be high - fast_field_writers.add_document(&doc!(*FIELD=>0u64)); + fast_field_writers + .add_document(&doc!(*FIELD=>0u64)) + .unwrap(); for i in 0u64..10_000u64 { - fast_field_writers.add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i)); + fast_field_writers + .add_document(&doc!(*FIELD=>5_000_000_000_000_000_000u64 + i)) + .unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -350,7 +382,7 @@ mod tests { for i in -100i64..10_000i64 { let mut doc = Document::default(); doc.add_i64(i64_field, i); - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -395,7 +427,7 @@ mod tests { let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); let doc = Document::default(); - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) .unwrap(); @@ -438,7 +470,7 @@ mod tests { let mut serializer = CompositeFastFieldSerializer::from_write(write)?; let mut fast_field_writers = FastFieldsWriter::from_schema(&SCHEMA); for &x in &permutation { - fast_field_writers.add_document(&doc!(*FIELD=>x)); + fast_field_writers.add_document(&doc!(*FIELD=>x)).unwrap(); } fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?; serializer.close()?; @@ -788,10 +820,14 @@ mod tests { let write: WritePtr = directory.open_write(path).unwrap(); let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); - fast_field_writers.add_document(&doc!(field=>true)); - fast_field_writers.add_document(&doc!(field=>false)); - fast_field_writers.add_document(&doc!(field=>true)); - fast_field_writers.add_document(&doc!(field=>false)); + fast_field_writers.add_document(&doc!(field=>true)).unwrap(); + fast_field_writers + .add_document(&doc!(field=>false)) + .unwrap(); + fast_field_writers.add_document(&doc!(field=>true)).unwrap(); + fast_field_writers + .add_document(&doc!(field=>false)) + .unwrap(); fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) .unwrap(); @@ -825,8 +861,10 @@ mod tests { let mut serializer = CompositeFastFieldSerializer::from_write(write).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); for _ in 0..50 { - fast_field_writers.add_document(&doc!(field=>true)); - fast_field_writers.add_document(&doc!(field=>false)); + fast_field_writers.add_document(&doc!(field=>true)).unwrap(); + fast_field_writers + .add_document(&doc!(field=>false)) + .unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) @@ -860,7 +898,7 @@ mod tests { let mut serializer = CompositeFastFieldSerializer::from_write(write)?; let mut fast_field_writers = FastFieldsWriter::from_schema(&schema); let doc = Document::default(); - fast_field_writers.add_document(&doc); + fast_field_writers.add_document(&doc).unwrap(); fast_field_writers.serialize(&mut serializer, &HashMap::new(), None)?; serializer.close()?; } @@ -886,7 +924,7 @@ mod tests { CompositeFastFieldSerializer::from_write_with_codec(write, codec_types).unwrap(); let mut fast_field_writers = FastFieldsWriter::from_schema(schema); for doc in docs { - fast_field_writers.add_document(doc); + fast_field_writers.add_document(doc).unwrap(); } fast_field_writers .serialize(&mut serializer, &HashMap::new(), None) diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 127416f59..84cc0aa3b 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -6,6 +6,7 @@ use fastfield_codecs::{ use fnv::FnvHashMap; use super::get_fastfield_codecs_for_multivalue; +use crate::fastfield::writer::unexpected_value; use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType}; use crate::indexer::doc_id_mapping::DocIdMapping; use crate::postings::UnorderedTermId; @@ -81,11 +82,11 @@ impl MultiValuedFastFieldWriter { /// Shift to the next document and adds /// all of the matching field values present in the document. - pub fn add_document(&mut self, doc: &Document) { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { self.next_doc(); // facets/texts are indexed in the `SegmentWriter` as we encode their unordered id. if self.fast_field_type.is_storing_term_ids() { - return; + return Ok(()); } for field_value in doc.field_values() { if field_value.field == self.field { @@ -94,11 +95,12 @@ impl MultiValuedFastFieldWriter { (Some(precision), Value::Date(date_val)) => { date_val.truncate(precision).to_u64() } - _ => value_to_u64(value), + _ => value_to_u64(value)?, }; self.add_val(value_u64); } } + Ok(()) } /// Returns an iterator over values per doc_id in ascending doc_id order. @@ -312,18 +314,19 @@ impl MultiValueU128FastFieldWriter { /// Shift to the next document and adds /// all of the matching field values present in the document. - pub fn add_document(&mut self, doc: &Document) { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { self.next_doc(); for field_value in doc.field_values() { if field_value.field == self.field { let value = field_value.value(); let ip_addr = value .as_ip_addr() - .unwrap_or_else(|| panic!("expected and ip, but got {:?}", value)); + .ok_or_else(|| unexpected_value("ip", value))?; let ip_addr_u128 = ip_addr.to_u128(); self.add_val(ip_addr_u128); } } + Ok(()) } /// Returns an iterator over values per doc_id in ascending doc_id order. diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index 4e1fe50bc..8478008e9 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -25,6 +25,13 @@ pub struct FastFieldsWriter { bytes_value_writers: Vec, } +pub(crate) fn unexpected_value(expected: &str, actual: &Value) -> crate::TantivyError { + crate::TantivyError::SchemaError(format!( + "Expected a {:?} in fast field, but got {:?}", + expected, actual + )) +} + fn fast_field_default_value(field_entry: &FieldEntry) -> u64 { match *field_entry.field_type() { FieldType::I64(_) | FieldType::Date(_) => common::i64_to_u64(0i64), @@ -222,25 +229,26 @@ impl FastFieldsWriter { .find(|field_writer| field_writer.field() == field) } /// Indexes all of the fastfields of a new document. - pub fn add_document(&mut self, doc: &Document) { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { for field_writer in &mut self.term_id_writers { - field_writer.add_document(doc); + field_writer.add_document(doc)?; } for field_writer in &mut self.single_value_writers { - field_writer.add_document(doc); + field_writer.add_document(doc)?; } for field_writer in &mut self.multi_values_writers { - field_writer.add_document(doc); + field_writer.add_document(doc)?; } for field_writer in &mut self.bytes_value_writers { - field_writer.add_document(doc); + field_writer.add_document(doc)?; } for field_writer in &mut self.u128_value_writers { - field_writer.add_document(doc); + field_writer.add_document(doc)?; } for field_writer in &mut self.u128_multi_value_writers { - field_writer.add_document(doc); + field_writer.add_document(doc)?; } + Ok(()) } /// Serializes all of the `FastFieldWriter`s by pushing them in @@ -324,13 +332,10 @@ impl U128FastFieldWriter { /// /// Extract the value associated to the fast field for /// this document. - pub fn add_document(&mut self, doc: &Document) { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { match doc.get_first(self.field) { Some(v) => { - let ip_addr = v - .as_ip_addr() - .unwrap_or_else(|| panic!("expected and ip, but got {:?}", v)); - + let ip_addr = v.as_ip_addr().ok_or_else(|| unexpected_value("ip", v))?; let value = ip_addr.to_u128(); self.add_val(value); } @@ -339,6 +344,7 @@ impl U128FastFieldWriter { } }; self.val_count += 1; + Ok(()) } /// Push the fast fields value to the `FastFieldWriter`. @@ -465,14 +471,14 @@ impl IntFastFieldWriter { /// only the first one is taken in account. /// /// Values on text fast fields are skipped. - pub fn add_document(&mut self, doc: &Document) { + pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> { match doc.get_first(self.field) { Some(v) => { let value = match (self.precision_opt, v) { (Some(precision), Value::Date(date_val)) => { date_val.truncate(precision).to_u64() } - _ => super::value_to_u64(v), + _ => super::value_to_u64(v)?, }; self.add_val(value); } @@ -480,6 +486,7 @@ impl IntFastFieldWriter { self.add_val(self.val_if_missing); } }; + Ok(()) } /// get iterator over the data diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 1cb7916ec..812aa78b6 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -306,7 +306,7 @@ impl SegmentWriter { pub fn add_document(&mut self, add_operation: AddOperation) -> crate::Result<()> { let doc = add_operation.document; self.doc_opstamps.push(add_operation.opstamp); - self.fast_field_writers.add_document(&doc); + self.fast_field_writers.add_document(&doc)?; self.index_document(&doc)?; let doc_writer = self.segment_serializer.get_store_writer(); doc_writer.store(&doc, &self.schema)?;