From 570dcb67cf89ff358a5578d22f71f9d64e7c17ea Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 3 Dec 2020 14:01:29 +0900 Subject: [PATCH] Small refactoring --- src/collector/facet_collector.rs | 9 ++++++--- src/collector/top_score_collector.rs | 2 +- src/core/index.rs | 16 ++++++++-------- src/fastfield/facet_reader.rs | 8 +++++--- src/positions/serializer.rs | 14 +++++++------- src/postings/mod.rs | 7 ++++--- src/query/automaton_weight.rs | 3 +++ src/schema/facet.rs | 1 + src/termdict/fst_termdict/termdict.rs | 7 +++---- src/termdict/tests.rs | 2 +- 10 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 6d91ef5d2..6c9404e7f 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -368,9 +368,12 @@ impl SegmentCollector for FacetSegmentCollector { } let mut facet = vec![]; let facet_ord = self.collapse_facet_ords[collapsed_facet_ord]; - facet_dict.ord_to_term(facet_ord as u64, &mut facet); - // TODO - facet_counts.insert(Facet::from_encoded(facet).unwrap(), count); + // TODO handle errors. + if facet_dict.ord_to_term(facet_ord as u64, &mut facet).is_ok() { + if let Ok(facet) = Facet::from_encoded(facet) { + facet_counts.insert(facet, count); + } + } } FacetCounts { facet_counts } } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index f1af215ee..85f857b68 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -728,7 +728,7 @@ mod tests { } #[test] - fn test_top_collector_not_at_capacity() { + fn test_top_collector_not_at_capacity_without_offset() { let index = make_index(); let field = index.schema().get_field("text").unwrap(); let query_parser = QueryParser::for_index(&index, vec![field]); diff --git a/src/core/index.rs b/src/core/index.rs index 5dacfcbf5..12ef5e37f 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -511,28 +511,28 @@ mod tests { } #[test] - fn test_index_manual_policy_mmap() { + fn test_index_manual_policy_mmap() -> crate::Result<()> { let schema = throw_away_schema(); let field = schema.get_field("num_likes").unwrap(); - let mut index = Index::create_from_tempdir(schema).unwrap(); - let mut writer = index.writer_for_tests().unwrap(); - writer.commit().unwrap(); + let mut index = Index::create_from_tempdir(schema)?; + let mut writer = index.writer_for_tests()?; + writer.commit()?; let reader = index .reader_builder() .reload_policy(ReloadPolicy::Manual) - .try_into() - .unwrap(); + .try_into()?; assert_eq!(reader.searcher().num_docs(), 0); writer.add_document(doc!(field=>1u64)); let (sender, receiver) = crossbeam::channel::unbounded(); let _handle = index.directory_mut().watch(WatchCallback::new(move || { let _ = sender.send(()); })); - writer.commit().unwrap(); + writer.commit()?; assert!(receiver.recv().is_ok()); assert_eq!(reader.searcher().num_docs(), 0); - reader.reload().unwrap(); + reader.reload()?; assert_eq!(reader.searcher().num_docs(), 1); + Ok(()) } #[test] diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index 78598f38a..ced6d0654 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -1,4 +1,5 @@ use super::MultiValueIntFastFieldReader; +use crate::error::DataCorruption; use crate::schema::Facet; use crate::termdict::TermDictionary; use crate::termdict::TermOrdinal; @@ -62,12 +63,13 @@ impl FacetReader { &mut self, facet_ord: TermOrdinal, output: &mut Facet, - ) -> Result<(), str::Utf8Error> { + ) -> crate::Result<()> { let found_term = self .term_dict - .ord_to_term(facet_ord as u64, &mut self.buffer); + .ord_to_term(facet_ord as u64, &mut self.buffer)?; assert!(found_term, "Term ordinal {} no found.", facet_ord); - let facet_str = str::from_utf8(&self.buffer[..])?; + let facet_str = str::from_utf8(&self.buffer[..]) + .map_err(|utf8_err| DataCorruption::comment_only(utf8_err.to_string()))?; output.set_facet_str(facet_str); Ok(()) } diff --git a/src/positions/serializer.rs b/src/positions/serializer.rs index 49cfdda83..72eb652b4 100644 --- a/src/positions/serializer.rs +++ b/src/positions/serializer.rs @@ -8,7 +8,7 @@ use std::io::{self, Write}; pub struct PositionSerializer { bit_packer: BitPacker4x, write_stream: CountingWriter, - write_skiplist: W, + write_skip_index: W, block: Vec, buffer: Vec, num_ints: u64, @@ -16,11 +16,11 @@ pub struct PositionSerializer { } impl PositionSerializer { - pub fn new(write_stream: W, write_skiplist: W) -> PositionSerializer { + pub fn new(write_stream: W, write_skip_index: W) -> PositionSerializer { PositionSerializer { bit_packer: BitPacker4x::new(), write_stream: CountingWriter::wrap(write_stream), - write_skiplist, + write_skip_index, block: Vec::with_capacity(128), buffer: vec![0u8; 128 * 4], num_ints: 0u64, @@ -52,7 +52,7 @@ impl PositionSerializer { fn flush_block(&mut self) -> io::Result<()> { let num_bits = self.bit_packer.num_bits(&self.block[..]); - self.write_skiplist.write_all(&[num_bits])?; + self.write_skip_index.write_all(&[num_bits])?; let written_len = self .bit_packer .compress(&self.block[..], &mut self.buffer, num_bits); @@ -70,10 +70,10 @@ impl PositionSerializer { self.flush_block()?; } for &long_skip in &self.long_skips { - long_skip.serialize(&mut self.write_skiplist)?; + long_skip.serialize(&mut self.write_skip_index)?; } - (self.long_skips.len() as u32).serialize(&mut self.write_skiplist)?; - self.write_skiplist.flush()?; + (self.long_skips.len() as u32).serialize(&mut self.write_skip_index)?; + self.write_skip_index.flush()?; self.write_stream.flush()?; Ok(()) } diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 226adf46d..ca736ed18 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -54,7 +54,7 @@ pub mod tests { use crate::DocId; use crate::HasLen; use crate::Score; - use std::iter; + use std::{iter, mem}; #[test] pub fn test_position_write() -> crate::Result<()> { @@ -71,6 +71,7 @@ pub mod tests { field_serializer.write_doc(doc_id, 4, &delta_positions)?; } field_serializer.close_term()?; + mem::drop(field_serializer); posting_serializer.close()?; let read = segment.open_read(SegmentComponent::POSITIONS)?; assert!(read.len() <= 140); @@ -179,7 +180,7 @@ pub mod tests { let inverted_index = segment_reader.inverted_index(text_field)?; assert_eq!(inverted_index.terms().num_terms(), 1); let mut bytes = vec![]; - assert!(inverted_index.terms().ord_to_term(0, &mut bytes)); + assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?); assert_eq!(&bytes, b"hello"); } { @@ -191,7 +192,7 @@ pub mod tests { let inverted_index = segment_reader.inverted_index(text_field)?; assert_eq!(inverted_index.terms().num_terms(), 1); let mut bytes = vec![]; - assert!(inverted_index.terms().ord_to_term(0, &mut bytes)); + assert!(inverted_index.terms().ord_to_term(0, &mut bytes)?); assert_eq!(&bytes[..], ok_token_text.as_bytes()); } Ok(()) diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index 6ca424e0a..2ffa4309a 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -20,6 +20,7 @@ pub struct AutomatonWeight { impl AutomatonWeight where A: Automaton + Send + Sync + 'static, + A::State: Clone, { /// Create a new AutomationWeight pub fn new>>(field: Field, automaton: IntoArcA) -> AutomatonWeight { @@ -42,6 +43,7 @@ where impl Weight for AutomatonWeight where A: Automaton + Send + Sync + 'static, + A::State: Clone, { fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { let max_doc = reader.max_doc(); @@ -102,6 +104,7 @@ mod tests { index } + #[derive(Clone, Copy)] enum State { Start, NotMatching, diff --git a/src/schema/facet.rs b/src/schema/facet.rs index 192a536cd..1fec07185 100644 --- a/src/schema/facet.rs +++ b/src/schema/facet.rs @@ -233,6 +233,7 @@ mod tests { assert_eq!(Facet::root(), Facet::from("/")); assert_eq!(format!("{}", Facet::root()), "/"); assert!(Facet::root().is_root()); + assert_eq!(Facet::root().encoded_str(), ""); } #[test] diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs index 240706dc6..ff0d4ec5f 100644 --- a/src/termdict/fst_termdict/termdict.rs +++ b/src/termdict/fst_termdict/termdict.rs @@ -80,7 +80,6 @@ where .serialize(&mut counting_writer)?; let footer_size = counting_writer.written_bytes(); (footer_size as u64).serialize(&mut counting_writer)?; - counting_writer.flush()?; } Ok(file) } @@ -152,7 +151,7 @@ impl TermDictionary { /// /// Regardless of whether the term is found or not, /// the buffer may be modified. - pub fn ord_to_term(&self, mut ord: TermOrdinal, bytes: &mut Vec) -> bool { + pub fn ord_to_term(&self, mut ord: TermOrdinal, bytes: &mut Vec) -> io::Result { bytes.clear(); let fst = self.fst_index.as_fst(); let mut node = fst.root(); @@ -167,10 +166,10 @@ impl TermDictionary { let new_node_addr = transition.addr; node = fst.node(new_node_addr); } else { - return false; + return Ok(false); } } - true + Ok(true) } /// Returns the number of terms in the dictionary. diff --git a/src/termdict/tests.rs b/src/termdict/tests.rs index dae67944b..94a1cff1c 100644 --- a/src/termdict/tests.rs +++ b/src/termdict/tests.rs @@ -50,7 +50,7 @@ fn test_term_ordinals() -> crate::Result<()> { for (term_ord, term) in COUNTRIES.iter().enumerate() { assert_eq!(term_dict.term_ord(term)?, Some(term_ord as u64)); let mut bytes = vec![]; - assert!(term_dict.ord_to_term(term_ord as u64, &mut bytes)); + assert!(term_dict.ord_to_term(term_ord as u64, &mut bytes)?); assert_eq!(bytes, term.as_bytes()); } Ok(())