From 439d6956a96903842d96f304abbddd7adc40aaaf Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 7 Sep 2020 15:52:34 +0900 Subject: [PATCH] Returning Result in some of the API (#880) * Returning Result in some of the API * Introducing `.writer_for_test(..)` --- src/collector/facet_collector.rs | 34 +-- src/collector/int_facet_collector.rs | 2 +- src/collector/multi_collector.rs | 2 +- src/collector/top_score_collector.rs | 14 +- src/core/index.rs | 13 +- src/core/inverted_index_reader.rs | 6 +- src/core/segment_reader.rs | 29 +- src/directory/mmap_directory.rs | 3 +- src/fastfield/bytes/mod.rs | 2 +- src/fastfield/mod.rs | 4 +- src/fastfield/multivalued/mod.rs | 8 +- src/fastfield/multivalued/reader.rs | 2 +- src/indexer/index_writer.rs | 12 +- src/indexer/merger.rs | 36 +-- src/indexer/mod.rs | 2 +- src/indexer/segment_updater.rs | 6 +- src/lib.rs | 368 +++++++++++------------- src/postings/block_segment_postings.rs | 4 +- src/postings/mod.rs | 43 ++- src/query/all_query.rs | 2 +- src/query/automaton_weight.rs | 7 +- src/query/boolean_query/mod.rs | 4 +- src/query/boost_query.rs | 2 +- src/query/fuzzy_query.rs | 2 +- src/query/phrase_query/mod.rs | 8 +- src/query/phrase_query/phrase_weight.rs | 14 +- src/query/range_query.rs | 19 +- src/query/regex_query.rs | 2 +- src/query/term_query/mod.rs | 10 +- src/query/term_query/term_weight.rs | 11 +- src/schema/mod.rs | 2 +- src/schema/value.rs | 6 + src/snippet/mod.rs | 6 +- src/space_usage/mod.rs | 8 +- src/termdict/mod.rs | 2 +- 35 files changed, 339 insertions(+), 356 deletions(-) diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index b8e7f42a5..98b844fb6 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -472,7 +472,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let num_facets: usize = 3 * 4 * 5; let facets: Vec = (0..num_facets) .map(|mut n| { @@ -531,7 +531,7 @@ mod tests { let facet_field = schema_builder.add_facet_field("facets"); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!( facet_field => Facet::from_text(&"/subjects/A/a"), facet_field => Facet::from_text(&"/subjects/B/a"), @@ -550,12 +550,12 @@ mod tests { } #[test] - fn test_doc_search_by_facet() { + fn test_doc_search_by_facet() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let facet_field = schema_builder.add_facet_field("facet"); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; index_writer.add_document(doc!( facet_field => Facet::from_text(&"/A/A"), )); @@ -568,8 +568,8 @@ mod tests { index_writer.add_document(doc!( facet_field => Facet::from_text(&"/D/C/A"), )); - index_writer.commit().unwrap(); - let reader = index.reader().unwrap(); + index_writer.commit()?; + let reader = index.reader()?; let searcher = reader.searcher(); assert_eq!(searcher.num_docs(), 4); @@ -586,17 +586,17 @@ mod tests { assert_eq!(count_facet("/A/C"), 1); assert_eq!(count_facet("/A/C/A"), 1); assert_eq!(count_facet("/C/A"), 0); + + let query_parser = QueryParser::for_index(&index, vec![]); { - let query_parser = QueryParser::for_index(&index, vec![]); - { - let query = query_parser.parse_query("facet:/A/B").unwrap(); - assert_eq!(1, searcher.search(&query, &Count).unwrap()); - } - { - let query = query_parser.parse_query("facet:/A").unwrap(); - assert_eq!(3, searcher.search(&query, &Count).unwrap()); - } + let query = query_parser.parse_query("facet:/A/B")?; + assert_eq!(1, searcher.search(&query, &Count).unwrap()); } + { + let query = query_parser.parse_query("facet:/A")?; + assert_eq!(3, searcher.search(&query, &Count)?); + } + Ok(()) } #[test] @@ -631,7 +631,7 @@ mod tests { .collect(); docs[..].shuffle(&mut thread_rng()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for doc in docs { index_writer.add_document(doc); } @@ -684,7 +684,7 @@ mod bench { // 40425 docs docs[..].shuffle(&mut thread_rng()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for doc in docs { index_writer.add_document(doc); } diff --git a/src/collector/int_facet_collector.rs b/src/collector/int_facet_collector.rs index d9b4c1310..68c283647 100644 --- a/src/collector/int_facet_collector.rs +++ b/src/collector/int_facet_collector.rs @@ -89,7 +89,7 @@ mod tests { let index = Index::create_in_ram(schema.clone()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { for i in 0u64..10u64 { index_writer.add_document(doc!( diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index c3a963868..7db5ecafa 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -259,7 +259,7 @@ mod tests { let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text=>"abc")); index_writer.add_document(doc!(text=>"abc abc abc")); index_writer.add_document(doc!(text=>"abc abc")); diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 9042906f5..5bc9dc33b 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -38,7 +38,7 @@ use std::fmt; /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// -/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); +/// let mut index_writer = index.writer_for_tests().unwrap(); /// index_writer.add_document(doc!(title => "The Name of the Wind")); /// index_writer.add_document(doc!(title => "The Diary of Muadib")); /// index_writer.add_document(doc!(title => "A Dairy Cow")); @@ -123,7 +123,7 @@ impl TopDocs { /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); /// - /// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + /// let mut index_writer = index.writer_for_tests().unwrap(); /// index_writer.add_document(doc!(title => "The Name of the Wind")); /// index_writer.add_document(doc!(title => "The Diary of Muadib")); /// index_writer.add_document(doc!(title => "A Dairy Cow")); @@ -163,7 +163,7 @@ impl TopDocs { /// # let schema = schema_builder.build(); /// # /// # let index = Index::create_in_ram(schema); - /// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; + /// # let mut index_writer = index.writer_for_tests()?; /// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64)); /// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64)); /// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64)); @@ -264,7 +264,7 @@ impl TopDocs { /// fn create_index() -> tantivy::Result { /// let schema = create_schema(); /// let index = Index::create_in_ram(schema); - /// let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; + /// let mut index_writer = index.writer_for_tests()?; /// let product_name = index.schema().get_field("product_name").unwrap(); /// let popularity: Field = index.schema().get_field("popularity").unwrap(); /// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64)); @@ -371,7 +371,7 @@ impl TopDocs { /// # fn main() -> tantivy::Result<()> { /// # let schema = create_schema(); /// # let index = Index::create_in_ram(schema); - /// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; + /// # let mut index_writer = index.writer_for_tests()?; /// # let product_name = index.schema().get_field("product_name").unwrap(); /// # /// let popularity: Field = index.schema().get_field("popularity").unwrap(); @@ -561,7 +561,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field=>"Hello happy tax payer.")); index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer")); index_writer.add_document(doc!(text_field=>"I like Droopy")); @@ -821,7 +821,7 @@ mod tests { ) -> (Index, Box) { let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); doc_adder(&mut index_writer); index_writer.commit().unwrap(); let query_parser = QueryParser::for_index(&index, vec![query_field]); diff --git a/src/core/index.rs b/src/core/index.rs index b08b0df6d..162e74200 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -300,6 +300,15 @@ impl Index { ) } + /// Helper to create an index writer for tests. + /// + /// That index writer only simply has a single thread and a heap of 5 MB. + /// Using a single thread gives us a deterministic allocation of DocId. + #[cfg(test)] + pub fn writer_for_tests(&self) -> crate::Result { + self.writer_with_num_threads(1, 10_000_000) + } + /// Creates a multithreaded writer /// /// Tantivy will automatically define the number of threads to use. @@ -502,7 +511,7 @@ mod tests { let schema = throw_away_schema(); let field = schema.get_field("num_likes").unwrap(); let mut index = Index::create_from_tempdir(schema).unwrap(); - let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut writer = index.writer_for_tests().unwrap(); writer.commit().unwrap(); let reader = index .reader_builder() @@ -545,7 +554,7 @@ mod tests { let _watch_handle = reader_index.directory_mut().watch(Box::new(move || { let _ = sender.send(()); })); - let mut writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut writer = index.writer_for_tests().unwrap(); assert_eq!(reader.searcher().num_docs(), 0); writer.add_document(doc!(field=>1u64)); writer.commit().unwrap(); diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index f68f0a117..e62f65cf5 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -3,7 +3,6 @@ use crate::directory::ReadOnlySource; use crate::positions::PositionReader; use crate::postings::TermInfo; use crate::postings::{BlockSegmentPostings, SegmentPostings}; -use crate::schema::FieldType; use crate::schema::IndexRecordOption; use crate::schema::Term; use crate::termdict::TermDictionary; @@ -54,10 +53,7 @@ impl InvertedIndexReader { /// Creates an empty `InvertedIndexReader` object, which /// contains no terms at all. - pub fn empty(field_type: &FieldType) -> InvertedIndexReader { - let record_option = field_type - .get_index_record_option() - .unwrap_or(IndexRecordOption::Basic); + pub fn empty(record_option: IndexRecordOption) -> InvertedIndexReader { InvertedIndexReader { termdict: TermDictionary::empty(), postings_source: ReadOnlySource::empty(), diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index ebd9ad3de..b941d44d3 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -9,9 +9,9 @@ use crate::fastfield::DeleteBitSet; use crate::fastfield::FacetReader; use crate::fastfield::FastFieldReaders; use crate::fieldnorm::{FieldNormReader, FieldNormReaders}; -use crate::schema::Field; use crate::schema::FieldType; use crate::schema::Schema; +use crate::schema::{Field, IndexRecordOption}; use crate::space_usage::SegmentSpaceUsage; use crate::store::StoreReader; use crate::termdict::TermDictionary; @@ -125,17 +125,15 @@ impl SegmentReader { /// /// They are simply stored as a fast field, serialized in /// the `.fieldnorm` file of the segment. - pub fn get_fieldnorms_reader(&self, field: Field) -> FieldNormReader { - if let Some(fieldnorm_reader) = self.fieldnorm_readers.get_field(field) { - fieldnorm_reader - } else { + pub fn get_fieldnorms_reader(&self, field: Field) -> crate::Result { + self.fieldnorm_readers.get_field(field).ok_or_else(|| { let field_name = self.schema.get_field_name(field); let err_msg = format!( "Field norm not found for field {:?}. Was it market as indexed during indexing.", field_name ); - panic!(err_msg); - } + crate::TantivyError::SchemaError(err_msg) + }) } /// Accessor to the segment's `StoreReader`. @@ -212,6 +210,11 @@ impl SegmentReader { /// The field reader is in charge of iterating through the /// term dictionary associated to a specific field, /// and opening the posting list associated to any term. + /// + /// If the field is marked as index, a warn is logged and an empty `InvertedIndexReader` + /// is returned. + /// Similarly if the field is marked as indexed but no term has been indexed for the given + /// index. an empty `InvertedIndexReader` is returned (but no warning is logged). pub fn inverted_index(&self, field: Field) -> Arc { if let Some(inv_idx_reader) = self .inv_idx_reader_cache @@ -226,21 +229,21 @@ impl SegmentReader { let record_option_opt = field_type.get_index_record_option(); if record_option_opt.is_none() { - panic!("Field {:?} does not seem indexed.", field_entry.name()); + warn!("Field {:?} does not seem indexed.", field_entry.name()); } - let record_option = record_option_opt.unwrap(); - let postings_source_opt = self.postings_composite.open_read(field); - if postings_source_opt.is_none() { + if postings_source_opt.is_none() || record_option_opt.is_none() { // no documents in the segment contained this field. // As a result, no data is associated to the inverted index. // // Returns an empty inverted index. - return Arc::new(InvertedIndexReader::empty(field_type)); + let record_option = record_option_opt.unwrap_or(IndexRecordOption::Basic); + return Arc::new(InvertedIndexReader::empty(record_option)); } + let record_option = record_option_opt.unwrap(); let postings_source = postings_source_opt.unwrap(); let termdict_source = self.termdict_composite.open_read(field).expect( @@ -339,7 +342,7 @@ mod test { let name = schema.get_field("name").unwrap(); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(name => "tantivy")); index_writer.add_document(doc!(name => "horse")); index_writer.add_document(doc!(name => "jockey")); diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 286d71407..6b295091c 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -34,7 +34,6 @@ use std::sync::Mutex; use std::sync::RwLock; use std::sync::Weak; use std::thread; -use tempfile; use tempfile::TempDir; /// Create a default io error given a string. @@ -655,7 +654,7 @@ mod tests { { let index = Index::create(mmap_directory.clone(), schema).unwrap(); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let mut log_merge_policy = LogMergePolicy::default(); log_merge_policy.set_min_merge_size(3); index_writer.set_merge_policy(Box::new(log_merge_policy)); diff --git a/src/fastfield/bytes/mod.rs b/src/fastfield/bytes/mod.rs index 9998e7cb1..43985db9d 100644 --- a/src/fastfield/bytes/mod.rs +++ b/src/fastfield/bytes/mod.rs @@ -15,7 +15,7 @@ mod tests { let field = schema_builder.add_bytes_field("bytesfield"); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(field=>vec![0u8, 1, 2, 3])); index_writer.add_document(doc!(field=>vec![])); index_writer.add_document(doc!(field=>vec![255u8])); diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 8c9a1c68b..ed64c2026 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -474,7 +474,7 @@ mod tests { let date_field = schema_builder.add_date_field("date", FAST); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.set_merge_policy(Box::new(NoMergePolicy)); index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now())); index_writer.commit().unwrap(); @@ -511,7 +511,7 @@ mod tests { ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.set_merge_policy(Box::new(NoMergePolicy)); index_writer.add_document(doc!( date_field => crate::DateTime::from_u64(1i64.to_u64()), diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 270f07fc3..db9981a04 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -25,7 +25,7 @@ mod tests { ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(field=>1u64, field=>3u64)); index_writer.add_document(doc!()); index_writer.add_document(doc!(field=>4u64)); @@ -64,7 +64,7 @@ mod tests { schema_builder.add_i64_field("time_stamp_i", IntOptions::default().set_stored()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let first_time_stamp = chrono::Utc::now(); index_writer.add_document( doc!(date_field=>first_time_stamp, date_field=>first_time_stamp, time_i=>1i64), @@ -186,7 +186,7 @@ mod tests { ); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(field=> 1i64, field => 3i64)); index_writer.add_document(doc!()); index_writer.add_document(doc!(field=> -4i64)); @@ -221,7 +221,7 @@ mod tests { let field = schema_builder.add_facet_field("facetfield"); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for i in 0..100_000 { index_writer.add_document(doc!(field=> Facet::from(format!("/lang/{}", i).as_str()))); } diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index ed0b61367..efc9ed5c8 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -74,7 +74,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index - .writer_with_num_threads(1, 30_000_000) + .writer_for_tests() .expect("Failed to create index writer."); index_writer.add_document(doc!( facet_field => Facet::from("/category/cat2"), diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index c3ba6b45c..b36ac331c 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -800,7 +800,7 @@ mod tests { let mut schema_builder = schema::Schema::builder(); let text_field = schema_builder.add_text_field("text", schema::TEXT); let index = Index::create_in_ram(schema_builder.build()); - let index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let index_writer = index.writer_for_tests().unwrap(); let operations = vec![ UserOperation::Add(doc!(text_field=>"a")), UserOperation::Add(doc!(text_field=>"b")), @@ -815,7 +815,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", schema::TEXT); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field => "hello1")); index_writer.add_document(doc!(text_field => "hello2")); assert!(index_writer.commit().is_ok()); @@ -864,7 +864,7 @@ mod tests { .reload_policy(ReloadPolicy::Manual) .try_into() .unwrap(); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let a_term = Term::from_field_text(text_field, "a"); let b_term = Term::from_field_text(text_field, "b"); let operations = vec![ @@ -926,8 +926,8 @@ mod tests { fn test_lockfile_already_exists_error_msg() { let schema_builder = schema::Schema::builder(); let index = Index::create_in_ram(schema_builder.build()); - let _index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - match index.writer_with_num_threads(1, 3_000_000) { + let _index_writer = index.writer_for_tests().unwrap(); + match index.writer_for_tests() { Err(err) => { let err_msg = err.to_string(); assert!(err_msg.contains("already an `IndexWriter`")); @@ -1261,7 +1261,7 @@ mod tests { let idfield = schema_builder.add_text_field("id", STRING); schema_builder.add_text_field("optfield", STRING); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(idfield=>"myid")); let commit = index_writer.commit(); assert!(commit.is_ok()); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 3ebe5dc74..b8de841f0 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -25,14 +25,14 @@ use std::cmp; use std::collections::HashMap; use std::sync::Arc; -fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 { +fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result { let mut total_tokens = 0u64; let mut count: [usize; 256] = [0; 256]; for reader in readers { if reader.has_deletes() { // if there are deletes, then we use an approximation // using the fieldnorm - let fieldnorms_reader = reader.get_fieldnorms_reader(field); + let fieldnorms_reader = reader.get_fieldnorms_reader(field)?; for doc in reader.doc_ids_alive() { let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc); count[fieldnorm_id as usize] += 1; @@ -41,7 +41,7 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 { total_tokens += reader.inverted_index(field).total_num_tokens(); } } - total_tokens + Ok(total_tokens + count .iter() .cloned() @@ -49,7 +49,7 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 { .map(|(fieldnorm_ord, count)| { count as u64 * u64::from(FieldNormReader::id_to_fieldnorm(fieldnorm_ord as u8)) }) - .sum::() + .sum::()) } pub struct IndexMerger { @@ -175,7 +175,7 @@ impl IndexMerger { for field in fields { fieldnorms_data.clear(); for reader in &self.readers { - let fieldnorms_reader = reader.get_fieldnorms_reader(field); + let fieldnorms_reader = reader.get_fieldnorms_reader(field)?; for doc_id in reader.doc_ids_alive() { let fieldnorm_id = fieldnorms_reader.fieldnorm_id(doc_id); fieldnorms_data.push(fieldnorm_id); @@ -541,7 +541,7 @@ impl IndexMerger { // The total number of tokens will only be exact when there has been no deletes. // // Otherwise, we approximate by removing deleted documents proportionally. - let total_num_tokens: u64 = compute_total_num_tokens(&self.readers, indexed_field); + let total_num_tokens: u64 = compute_total_num_tokens(&self.readers, indexed_field)?; // Create the total list of doc ids // by stacking the doc ids from the different segment. @@ -751,7 +751,7 @@ mod tests { }; { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { // writing the segment { @@ -803,7 +803,7 @@ mod tests { let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); block_on(index_writer.merge(&segment_ids)).expect("Merging failed"); index_writer.wait_merging_threads().unwrap(); } @@ -904,7 +904,7 @@ mod tests { let score_field = schema_builder.add_u64_field("score", score_fieldtype); let bytes_score_field = schema_builder.add_bytes_field("score_bytes"); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let reader = index.reader().unwrap(); let search_term = |searcher: &Searcher, term: Term| { let collector = FastFieldTestCollector::for_field(score_field); @@ -1211,7 +1211,7 @@ mod tests { let index = Index::create_in_ram(schema_builder.build()); let reader = index.reader().unwrap(); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let index_doc = |index_writer: &mut IndexWriter, doc_facets: &[&str]| { let mut doc = Document::default(); for facet in doc_facets { @@ -1276,7 +1276,7 @@ mod tests { let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); block_on(index_writer.merge(&segment_ids)).expect("Merging failed"); index_writer.wait_merging_threads().unwrap(); reader.reload().unwrap(); @@ -1295,7 +1295,7 @@ mod tests { // Deleting one term { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let facet = Facet::from_path(vec!["top", "a", "firstdoc"]); let facet_term = Term::from_facet(facet_field, &facet); index_writer.delete_term(facet_term); @@ -1320,7 +1320,7 @@ mod tests { let mut schema_builder = schema::Schema::builder(); let int_field = schema_builder.add_u64_field("intvals", INDEXED); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(int_field => 1u64)); index_writer.commit().expect("commit failed"); index_writer.add_document(doc!(int_field => 1u64)); @@ -1349,7 +1349,7 @@ mod tests { let index = Index::create_in_ram(schema_builder.build()); let reader = index.reader().unwrap(); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let mut doc = Document::default(); doc.add_u64(int_field, 1); index_writer.add_document(doc.clone()); @@ -1388,7 +1388,7 @@ mod tests { let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let index_doc = |index_writer: &mut IndexWriter, int_vals: &[u64]| { let mut doc = Document::default(); for &val in int_vals { @@ -1462,7 +1462,7 @@ mod tests { let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); assert!(block_on(index_writer.merge(&segment_ids)).is_ok()); assert!(index_writer.wait_merging_threads().is_ok()); } @@ -1516,7 +1516,7 @@ mod tests { let index = Index::create_in_ram(builder.build()); - let mut writer = index.writer_with_num_threads(1, 3_000_000)?; + let mut writer = index.writer_for_tests()?; // Make sure we'll attempt to merge every created segment let mut policy = crate::indexer::LogMergePolicy::default(); @@ -1548,7 +1548,7 @@ mod tests { let mut builder = schema::SchemaBuilder::new(); let text = builder.add_text_field("text", TEXT); let index = Index::create_in_ram(builder.build()); - let mut writer = index.writer_with_num_threads(1, 3_000_000)?; + let mut writer = index.writer_for_tests()?; let happy_term = Term::from_field_text(text, "happy"); let term_query = TermQuery::new(happy_term, IndexRecordOption::WithFreqs); for _ in 0..62 { diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index d09305399..db5a8daa1 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -40,7 +40,7 @@ mod tests_mmap { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", schema::TEXT); let index = Index::create_from_tempdir(schema_builder.build()).unwrap(); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); // there must be one deleted document in the segment index_writer.add_document(doc!(text_field=>"b")); index_writer.delete_term(Term::from_field_text(text_field, "b")); diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 9205ac30a..58a38829e 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -555,7 +555,7 @@ mod tests { let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.set_merge_policy(Box::new(MergeWheneverPossible)); { @@ -608,7 +608,7 @@ mod tests { let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { for _ in 0..100 { @@ -679,7 +679,7 @@ mod tests { let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { for _ in 0..100 { diff --git a/src/lib.rs b/src/lib.rs index 4ef03e600..9789f46e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -296,7 +296,6 @@ mod tests { use crate::schema::*; use crate::DocAddress; use crate::Index; - use crate::IndexWriter; use crate::Postings; use crate::ReloadPolicy; use rand::distributions::Bernoulli; @@ -361,14 +360,14 @@ mod tests { #[test] #[cfg(feature = "mmap")] - fn test_indexing() { + fn test_indexing() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_from_tempdir(schema).unwrap(); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; { let doc = doc!(text_field=>"af b"); index_writer.add_document(doc); @@ -383,29 +382,30 @@ mod tests { } assert!(index_writer.commit().is_ok()); } + Ok(()) } #[test] - fn test_docfreq1() { + fn test_docfreq1() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; { index_writer.add_document(doc!(text_field=>"a b c")); - index_writer.commit().unwrap(); + index_writer.commit()?; } { index_writer.add_document(doc!(text_field=>"a")); index_writer.add_document(doc!(text_field=>"a a")); - index_writer.commit().unwrap(); + index_writer.commit()?; } { index_writer.add_document(doc!(text_field=>"c")); - index_writer.commit().unwrap(); + index_writer.commit()?; } { - let reader = index.reader().unwrap(); + let reader = index.reader()?; let searcher = reader.searcher(); let term_a = Term::from_field_text(text_field, "a"); assert_eq!(searcher.doc_freq(&term_a), 3); @@ -416,67 +416,50 @@ mod tests { let term_d = Term::from_field_text(text_field, "d"); assert_eq!(searcher.doc_freq(&term_d), 0); } + Ok(()) } #[test] - fn test_fieldnorm_no_docs_with_field() { + fn test_fieldnorm_no_docs_with_field() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let title_field = schema_builder.add_text_field("title", TEXT); let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(text_field=>"a b c")); + index_writer.commit()?; + let index_reader = index.reader()?; + let searcher = index_reader.searcher(); + let reader = searcher.segment_reader(0); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - { - let doc = doc!(text_field=>"a b c"); - index_writer.add_document(doc); - } - index_writer.commit().unwrap(); + let fieldnorm_reader = reader.get_fieldnorms_reader(text_field)?; + assert_eq!(fieldnorm_reader.fieldnorm(0), 3); } { - let index_reader = index.reader().unwrap(); - let searcher = index_reader.searcher(); - let reader = searcher.segment_reader(0); - { - let fieldnorm_reader = reader.get_fieldnorms_reader(text_field); - assert_eq!(fieldnorm_reader.fieldnorm(0), 3); - } - { - let fieldnorm_reader = reader.get_fieldnorms_reader(title_field); - assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0); - } + let fieldnorm_reader = reader.get_fieldnorms_reader(title_field)?; + assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0); } + Ok(()) } #[test] - fn test_fieldnorm() { + fn test_fieldnorm() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); - { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - { - let doc = doc!(text_field=>"a b c"); - index_writer.add_document(doc); - } - { - let doc = doc!(); - index_writer.add_document(doc); - } - { - let doc = doc!(text_field=>"a b"); - index_writer.add_document(doc); - } - index_writer.commit().unwrap(); - } - { - let reader = index.reader().unwrap(); - let searcher = reader.searcher(); - let segment_reader: &SegmentReader = searcher.segment_reader(0); - let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field); - assert_eq!(fieldnorms_reader.fieldnorm(0), 3); - assert_eq!(fieldnorms_reader.fieldnorm(1), 0); - assert_eq!(fieldnorms_reader.fieldnorm(2), 2); - } + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(text_field=>"a b c")); + index_writer.add_document(doc!()); + index_writer.add_document(doc!(text_field=>"a b")); + index_writer.commit()?; + let reader = index.reader()?; + let searcher = reader.searcher(); + let segment_reader: &SegmentReader = searcher.segment_reader(0); + let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field)?; + assert_eq!(fieldnorms_reader.fieldnorm(0), 3); + assert_eq!(fieldnorms_reader.fieldnorm(1), 0); + assert_eq!(fieldnorms_reader.fieldnorm(2), 2); + Ok(()) } fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool { @@ -491,7 +474,7 @@ mod tests { } #[test] - fn test_delete_postings1() { + fn test_delete_postings1() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let term_abcd = Term::from_field_text(text_field, "abcd"); @@ -507,7 +490,7 @@ mod tests { .unwrap(); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; // 0 index_writer.add_document(doc!(text_field=>"a b")); // 1 @@ -523,10 +506,10 @@ mod tests { index_writer.add_document(doc!(text_field=>" b c")); // 5 index_writer.add_document(doc!(text_field=>" a")); - index_writer.commit().unwrap(); + index_writer.commit()?; } { - reader.reload().unwrap(); + reader.reload()?; let searcher = reader.searcher(); let segment_reader = searcher.segment_reader(0); let inverted_index = segment_reader.inverted_index(text_field); @@ -554,15 +537,15 @@ mod tests { } { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; // 0 index_writer.add_document(doc!(text_field=>"a b")); // 1 index_writer.delete_term(Term::from_field_text(text_field, "c")); - index_writer.rollback().unwrap(); + index_writer.rollback()?; } { - reader.reload().unwrap(); + reader.reload()?; let searcher = reader.searcher(); let seg_reader = searcher.segment_reader(0); let inverted_index = seg_reader.inverted_index(term_abcd.field()); @@ -591,15 +574,15 @@ mod tests { } { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; index_writer.add_document(doc!(text_field=>"a b")); index_writer.delete_term(Term::from_field_text(text_field, "c")); - index_writer.rollback().unwrap(); + index_writer.rollback()?; index_writer.delete_term(Term::from_field_text(text_field, "a")); - index_writer.commit().unwrap(); + index_writer.commit()?; } { - reader.reload().unwrap(); + reader.reload()?; let searcher = reader.searcher(); let segment_reader = searcher.segment_reader(0); let inverted_index = segment_reader.inverted_index(term_abcd.field()); @@ -631,19 +614,20 @@ mod tests { assert!(!advance_undeleted(&mut postings, segment_reader)); } } + Ok(()) } #[test] - fn test_indexed_u64() { + fn test_indexed_u64() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let field = schema_builder.add_u64_field("value", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; index_writer.add_document(doc!(field=>1u64)); - index_writer.commit().unwrap(); - let reader = index.reader().unwrap(); + index_writer.commit()?; + let reader = index.reader()?; let searcher = reader.searcher(); let term = Term::from_field_u64(field, 1u64); let mut postings = searcher @@ -653,20 +637,21 @@ mod tests { .unwrap(); assert_eq!(postings.doc(), 0); assert_eq!(postings.advance(), TERMINATED); + Ok(()) } #[test] - fn test_indexed_i64() { + fn test_indexed_i64() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let value_field = schema_builder.add_i64_field("value", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; let negative_val = -1i64; index_writer.add_document(doc!(value_field => negative_val)); - index_writer.commit().unwrap(); - let reader = index.reader().unwrap(); + index_writer.commit()?; + let reader = index.reader()?; let searcher = reader.searcher(); let term = Term::from_field_i64(value_field, negative_val); let mut postings = searcher @@ -676,20 +661,21 @@ mod tests { .unwrap(); assert_eq!(postings.doc(), 0); assert_eq!(postings.advance(), TERMINATED); + Ok(()) } #[test] - fn test_indexed_f64() { + fn test_indexed_f64() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let value_field = schema_builder.add_f64_field("value", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; let val = std::f64::consts::PI; index_writer.add_document(doc!(value_field => val)); - index_writer.commit().unwrap(); - let reader = index.reader().unwrap(); + index_writer.commit()?; + let reader = index.reader()?; let searcher = reader.searcher(); let term = Term::from_field_f64(value_field, val); let mut postings = searcher @@ -699,26 +685,29 @@ mod tests { .unwrap(); assert_eq!(postings.doc(), 0); assert_eq!(postings.advance(), TERMINATED); + Ok(()) } #[test] - fn test_indexedfield_not_in_documents() { + fn test_indexedfield_not_in_documents() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let absent_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; index_writer.add_document(doc!(text_field=>"a")); assert!(index_writer.commit().is_ok()); - let reader = index.reader().unwrap(); + let reader = index.reader()?; let searcher = reader.searcher(); let segment_reader = searcher.segment_reader(0); - segment_reader.inverted_index(absent_field); //< should not panic + let inverted_index = segment_reader.inverted_index(absent_field); //< should not panic + assert_eq!(inverted_index.terms().num_terms(), 0); + Ok(()) } #[test] - fn test_delete_postings2() { + fn test_delete_postings2() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); @@ -726,53 +715,40 @@ mod tests { let reader = index .reader_builder() .reload_policy(ReloadPolicy::Manual) - .try_into() - .unwrap(); + .try_into()?; // writing the segment - let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); - - let add_document = |index_writer: &mut IndexWriter, val: &'static str| { - let doc = doc!(text_field=>val); - index_writer.add_document(doc); - }; - - let remove_document = |index_writer: &mut IndexWriter, val: &'static str| { - let delterm = Term::from_field_text(text_field, val); - index_writer.delete_term(delterm); - }; - - add_document(&mut index_writer, "63"); - add_document(&mut index_writer, "70"); - add_document(&mut index_writer, "34"); - add_document(&mut index_writer, "1"); - add_document(&mut index_writer, "38"); - add_document(&mut index_writer, "33"); - add_document(&mut index_writer, "40"); - add_document(&mut index_writer, "17"); - remove_document(&mut index_writer, "38"); - remove_document(&mut index_writer, "34"); - index_writer.commit().unwrap(); - reader.reload().unwrap(); - let searcher = reader.searcher(); - assert_eq!(searcher.num_docs(), 6); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(text_field=>"63")); + index_writer.add_document(doc!(text_field=>"70")); + index_writer.add_document(doc!(text_field=>"34")); + index_writer.add_document(doc!(text_field=>"1")); + index_writer.add_document(doc!(text_field=>"38")); + index_writer.add_document(doc!(text_field=>"33")); + index_writer.add_document(doc!(text_field=>"40")); + index_writer.add_document(doc!(text_field=>"17")); + index_writer.delete_term(Term::from_field_text(text_field, "38")); + index_writer.delete_term(Term::from_field_text(text_field, "34")); + index_writer.commit()?; + reader.reload()?; + assert_eq!(reader.searcher().num_docs(), 6); + Ok(()) } #[test] - fn test_termfreq() { + fn test_termfreq() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - let doc = doc!(text_field=>"af af af bc bc"); - index_writer.add_document(doc); - index_writer.commit().unwrap(); + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(text_field=>"af af af bc bc")); + index_writer.commit()?; } { - let index_reader = index.reader().unwrap(); + let index_reader = index.reader()?; let searcher = index_reader.searcher(); let reader = searcher.segment_reader(0); let inverted_index = reader.inverted_index(text_field); @@ -788,63 +764,63 @@ mod tests { assert_eq!(postings.term_freq(), 3); assert_eq!(postings.advance(), TERMINATED); } + Ok(()) } #[test] - fn test_searcher_1() { + fn test_searcher_1() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let reader = index.reader().unwrap(); - { - // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - index_writer.add_document(doc!(text_field=>"af af af b")); - index_writer.add_document(doc!(text_field=>"a b c")); - index_writer.add_document(doc!(text_field=>"a b c d")); - index_writer.commit().unwrap(); - } - { - reader.reload().unwrap(); - let searcher = reader.searcher(); - let get_doc_ids = |terms: Vec| { - let query = BooleanQuery::new_multiterms_query(terms); - let topdocs = searcher.search(&query, &TEST_COLLECTOR_WITH_SCORE).unwrap(); - topdocs.docs().to_vec() - }; - assert_eq!( - get_doc_ids(vec![Term::from_field_text(text_field, "a")]), - vec![DocAddress(0, 1), DocAddress(0, 2)] - ); - assert_eq!( - get_doc_ids(vec![Term::from_field_text(text_field, "af")]), - vec![DocAddress(0, 0)] - ); - assert_eq!( - get_doc_ids(vec![Term::from_field_text(text_field, "b")]), - vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)] - ); - assert_eq!( - get_doc_ids(vec![Term::from_field_text(text_field, "c")]), - vec![DocAddress(0, 1), DocAddress(0, 2)] - ); - assert_eq!( - get_doc_ids(vec![Term::from_field_text(text_field, "d")]), - vec![DocAddress(0, 2)] - ); - assert_eq!( - get_doc_ids(vec![ - Term::from_field_text(text_field, "b"), - Term::from_field_text(text_field, "a"), - ]), - vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)] - ); - } + let reader = index.reader()?; + // writing the segment + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(text_field=>"af af af b")); + index_writer.add_document(doc!(text_field=>"a b c")); + index_writer.add_document(doc!(text_field=>"a b c d")); + index_writer.commit()?; + + reader.reload()?; + let searcher = reader.searcher(); + let get_doc_ids = |terms: Vec| { + let query = BooleanQuery::new_multiterms_query(terms); + searcher + .search(&query, &TEST_COLLECTOR_WITH_SCORE) + .map(|topdocs| topdocs.docs().to_vec()) + }; + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "a")])?, + vec![DocAddress(0, 1), DocAddress(0, 2)] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "af")])?, + vec![DocAddress(0, 0)] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "b")])?, + vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "c")])?, + vec![DocAddress(0, 1), DocAddress(0, 2)] + ); + assert_eq!( + get_doc_ids(vec![Term::from_field_text(text_field, "d")])?, + vec![DocAddress(0, 2)] + ); + assert_eq!( + get_doc_ids(vec![ + Term::from_field_text(text_field, "b"), + Term::from_field_text(text_field, "a"), + ])?, + vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)] + ); + Ok(()) } #[test] - fn test_searcher_2() { + fn test_searcher_2() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); @@ -852,19 +828,17 @@ mod tests { let reader = index .reader_builder() .reload_policy(ReloadPolicy::Manual) - .try_into() - .unwrap(); + .try_into()?; assert_eq!(reader.searcher().num_docs(), 0u64); - { - // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); - index_writer.add_document(doc!(text_field=>"af b")); - index_writer.add_document(doc!(text_field=>"a b c")); - index_writer.add_document(doc!(text_field=>"a b c d")); - index_writer.commit().unwrap(); - } - reader.reload().unwrap(); + // writing the segment + let mut index_writer = index.writer_for_tests()?; + index_writer.add_document(doc!(text_field=>"af b")); + index_writer.add_document(doc!(text_field=>"a b c")); + index_writer.add_document(doc!(text_field=>"a b c d")); + index_writer.commit()?; + reader.reload()?; assert_eq!(reader.searcher().num_docs(), 3u64); + Ok(()) } #[test] @@ -886,7 +860,7 @@ mod tests { } #[test] - fn test_wrong_fast_field_type() { + fn test_wrong_fast_field_type() -> crate::Result<()> { let mut schema_builder = Schema::builder(); let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST); let fast_field_signed = schema_builder.add_i64_field("signed", FAST); @@ -896,14 +870,14 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; { let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64); index_writer.add_document(document); - index_writer.commit().unwrap(); + index_writer.commit()?; } - let reader = index.reader().unwrap(); + let reader = index.reader()?; let searcher = reader.searcher(); let segment_reader: &SegmentReader = searcher.segment_reader(0); { @@ -942,11 +916,12 @@ mod tests { let fast_field_reader = fast_field_reader_opt.unwrap(); assert_eq!(fast_field_reader.get(0), 4f64) } + Ok(()) } // motivated by #729 #[test] - fn test_update_via_delete_insert() { + fn test_update_via_delete_insert() -> crate::Result<()> { use crate::collector::Count; use crate::indexer::NoMergePolicy; use crate::query::AllQuery; @@ -960,17 +935,17 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema.clone()); - let index_reader = index.reader().unwrap(); + let index_reader = index.reader()?; - let mut index_writer = index.writer(3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests()?; index_writer.set_merge_policy(Box::new(NoMergePolicy)); for doc_id in 0u64..DOC_COUNT { index_writer.add_document(doc!(id => doc_id)); } - index_writer.commit().unwrap(); + index_writer.commit()?; - index_reader.reload().unwrap(); + index_reader.reload()?; let searcher = index_reader.searcher(); assert_eq!( @@ -981,12 +956,11 @@ mod tests { // update the 10 elements by deleting and re-adding for doc_id in 0u64..DOC_COUNT { index_writer.delete_term(Term::from_field_u64(id, doc_id)); - index_writer.commit().unwrap(); - index_reader.reload().unwrap(); - let doc = doc!(id => doc_id); - index_writer.add_document(doc); - index_writer.commit().unwrap(); - index_reader.reload().unwrap(); + index_writer.commit()?; + index_reader.reload()?; + index_writer.add_document(doc!(id => doc_id)); + index_writer.commit()?; + index_reader.reload()?; let searcher = index_reader.searcher(); // The number of document should be stable. assert_eq!( @@ -995,7 +969,7 @@ mod tests { ); } - index_reader.reload().unwrap(); + index_reader.reload()?; let searcher = index_reader.searcher(); let segment_ids: Vec = searcher .segment_readers() @@ -1004,20 +978,18 @@ mod tests { .collect(); block_on(index_writer.merge(&segment_ids)).unwrap(); - index_reader.reload().unwrap(); + index_reader.reload()?; let searcher = index_reader.searcher(); - - assert_eq!( - searcher.search(&AllQuery, &Count).unwrap(), - DOC_COUNT as usize - ); + assert_eq!(searcher.search(&AllQuery, &Count)?, DOC_COUNT as usize); + Ok(()) } #[test] - fn test_validate_checksum() { + fn test_validate_checksum() -> crate::Result<()> { let index_path = tempfile::tempdir().expect("dir"); let schema = Schema::builder().build(); - let index = Index::create_in_dir(&index_path, schema).expect("index"); - assert!(index.validate_checksum().unwrap().is_empty()); + let index = Index::create_in_dir(&index_path, schema)?; + assert!(index.validate_checksum()?.is_empty()); + Ok(()) } } diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs index 67ad5a1d0..87d1714f5 100644 --- a/src/postings/block_segment_postings.rs +++ b/src/postings/block_segment_postings.rs @@ -455,7 +455,7 @@ mod tests { let int_field = schema_builder.add_u64_field("id", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let mut last_doc = 0u32; for &doc in docs { for _ in last_doc..doc { @@ -496,7 +496,7 @@ mod tests { let int_field = schema_builder.add_u64_field("id", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); // create two postings list, one containg even number, // the other containing odd numbers. for i in 0..6 { diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 1bc855df7..a3ce3611d 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -91,7 +91,7 @@ pub mod tests { let title = schema_builder.add_text_field("title", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 30_000_000)?; + let mut index_writer = index.writer_for_tests()?; index_writer.add_document(doc!(title => r#"abc abc abc"#)); index_writer.add_document(doc!(title => r#"abc be be be be abc"#)); for _ in 0..1_000 { @@ -176,7 +176,7 @@ pub mod tests { .tokenizers() .register("simple_no_truncation", SimpleTokenizer); let reader = index.reader().unwrap(); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.set_merge_policy(Box::new(NoMergePolicy)); { index_writer.add_document(doc!(text_field=>exceeding_token_text)); @@ -205,7 +205,7 @@ pub mod tests { } #[test] - pub fn test_position_and_fieldnorm1() { + pub fn test_position_and_fieldnorm1() -> crate::Result<()> { let mut positions = Vec::new(); let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); @@ -217,42 +217,38 @@ pub mod tests { let mut segment_writer = SegmentWriter::for_segment(3_000_000, segment.clone(), &schema).unwrap(); { - let mut doc = Document::default(); // checking that position works if the field has two values - doc.add_text(text_field, "a b a c a d a a."); - doc.add_text(text_field, "d d d d a"); let op = AddOperation { opstamp: 0u64, - document: doc, + document: doc!( + text_field => "a b a c a d a a.", + text_field => "d d d d a" + ), }; - segment_writer.add_document(op, &schema).unwrap(); + segment_writer.add_document(op, &schema)?; } { - let mut doc = Document::default(); - doc.add_text(text_field, "b a"); let op = AddOperation { opstamp: 1u64, - document: doc, + document: doc!(text_field => "b a"), }; segment_writer.add_document(op, &schema).unwrap(); } for i in 2..1000 { - let mut doc = Document::default(); - let mut text = iter::repeat("e ").take(i).collect::(); + let mut text: String = iter::repeat("e ").take(i).collect(); text.push_str(" a"); - doc.add_text(text_field, &text); let op = AddOperation { opstamp: 2u64, - document: doc, + document: doc!(text_field => text), }; segment_writer.add_document(op, &schema).unwrap(); } - segment_writer.finalize().unwrap(); + segment_writer.finalize()?; } { - let segment_reader = SegmentReader::open(&segment).unwrap(); + let segment_reader = SegmentReader::open(&segment)?; { - let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field); + let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field)?; assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5); assert_eq!(fieldnorm_reader.fieldnorm(1), 2); for i in 2..1000 { @@ -312,6 +308,7 @@ pub mod tests { assert_eq!(postings_e.doc(), TERMINATED); } } + Ok(()) } #[test] @@ -322,7 +319,7 @@ pub mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field => "g b b d c g c")); index_writer.add_document(doc!(text_field => "g a b b a d c g c")); assert!(index_writer.commit().is_ok()); @@ -354,7 +351,7 @@ pub mod tests { let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for i in 0u64..num_docs as u64 { let doc = doc!(value_field => 2u64, value_field => i % 2u64); index_writer.add_document(doc); @@ -425,7 +422,7 @@ pub mod tests { // delete some of the documents { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.delete_term(term_0); assert!(index_writer.commit().is_ok()); } @@ -479,7 +476,7 @@ pub mod tests { // delete everything else { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.delete_term(term_1); assert!(index_writer.commit().is_ok()); } @@ -522,7 +519,7 @@ pub mod tests { let index = Index::create_in_ram(schema); let posting_list_size = 1_000_000; { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for _ in 0..posting_list_size { let mut doc = Document::default(); if rng.gen_bool(1f64 / 15f64) { diff --git a/src/query/all_query.rs b/src/query/all_query.rs index 7b607eae8..f1ace41f7 100644 --- a/src/query/all_query.rs +++ b/src/query/all_query.rs @@ -83,7 +83,7 @@ mod tests { let field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(field=>"aaa")); index_writer.add_document(doc!(field=>"bbb")); index_writer.commit().unwrap(); diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index 80d80ec40..f25375384 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -5,7 +5,6 @@ use crate::query::{BitSetDocSet, Explanation}; use crate::query::{Scorer, Weight}; use crate::schema::{Field, IndexRecordOption}; use crate::termdict::{TermDictionary, TermStreamer}; -use crate::Result; use crate::TantivyError; use crate::{DocId, Score}; use std::sync::Arc; @@ -40,7 +39,7 @@ impl Weight for AutomatonWeight where A: Automaton + Send + Sync + 'static, { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result> { + fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { let max_doc = reader.max_doc(); let mut doc_bitset = BitSet::with_max_value(max_doc); let inverted_index = reader.inverted_index(self.field); @@ -66,7 +65,7 @@ where Ok(Box::new(const_scorer)) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result { + fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) == doc { Ok(Explanation::new("AutomatonScorer", 1.0)) @@ -91,7 +90,7 @@ mod tests { let mut schema = Schema::builder(); let title = schema.add_text_field("title", STRING); let index = Index::create_in_ram(schema.build()); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(title=>"abc")); index_writer.add_document(doc!(title=>"bcd")); index_writer.add_document(doc!(title=>"abcd")); diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 60d5551e4..0f4d5f92b 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -32,7 +32,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { index_writer.add_document(doc!(text_field => "a b c")); index_writer.add_document(doc!(text_field => "a c")); @@ -224,7 +224,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field => "a b c")); index_writer.add_document(doc!(text_field => "a c")); index_writer.add_document(doc!(text_field => "b c")); diff --git a/src/query/boost_query.rs b/src/query/boost_query.rs index 7cdb919da..825399c08 100644 --- a/src/query/boost_query.rs +++ b/src/query/boost_query.rs @@ -144,7 +144,7 @@ mod tests { fn test_boost_query_explain() { let schema = Schema::builder().build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(Document::new()); assert!(index_writer.commit().is_ok()); let reader = index.reader().unwrap(); diff --git a/src/query/fuzzy_query.rs b/src/query/fuzzy_query.rs index 831acb021..41cc17bc3 100644 --- a/src/query/fuzzy_query.rs +++ b/src/query/fuzzy_query.rs @@ -177,7 +177,7 @@ mod test { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!( country_field => "japan", )); diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index f75423a48..fcff64dc1 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -24,7 +24,7 @@ pub mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for &text in texts { let doc = doc!(text_field=>text); index_writer.add_document(doc); @@ -135,7 +135,7 @@ pub mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field=>"a b c")); assert!(index_writer.commit().is_ok()); } @@ -186,7 +186,7 @@ pub mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field=>"b")); index_writer.add_document(doc!(text_field=>"a b")); index_writer.add_document(doc!(text_field=>"b a")); @@ -217,7 +217,7 @@ pub mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field=>"a b c d e f g h")); assert!(index_writer.commit().is_ok()); } diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index fde8a367a..063ad763b 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -9,8 +9,8 @@ use crate::query::Weight; use crate::query::{EmptyScorer, Explanation}; use crate::schema::IndexRecordOption; use crate::schema::Term; +use crate::Score; use crate::{DocId, DocSet}; -use crate::{Result, Score}; pub struct PhraseWeight { phrase_terms: Vec<(usize, Term)>, @@ -32,7 +32,7 @@ impl PhraseWeight { } } - fn fieldnorm_reader(&self, reader: &SegmentReader) -> FieldNormReader { + fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result { let field = self.phrase_terms[0].1.field(); reader.get_fieldnorms_reader(field) } @@ -41,9 +41,9 @@ impl PhraseWeight { &self, reader: &SegmentReader, boost: Score, - ) -> Result>> { + ) -> crate::Result>> { let similarity_weight = self.similarity_weight.boost_by(boost); - let fieldnorm_reader = self.fieldnorm_reader(reader); + let fieldnorm_reader = self.fieldnorm_reader(reader)?; if reader.has_deletes() { let mut term_postings_list = Vec::new(); for &(offset, ref term) in &self.phrase_terms { @@ -85,7 +85,7 @@ impl PhraseWeight { } impl Weight for PhraseWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result> { + fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { if let Some(scorer) = self.phrase_scorer(reader, boost)? { Ok(Box::new(scorer)) } else { @@ -93,7 +93,7 @@ impl Weight for PhraseWeight { } } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result { + fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { let scorer_opt = self.phrase_scorer(reader, 1.0)?; if scorer_opt.is_none() { return Err(does_not_match(doc)); @@ -102,7 +102,7 @@ impl Weight for PhraseWeight { if scorer.seek(doc) != doc { return Err(does_not_match(doc)); } - let fieldnorm_reader = self.fieldnorm_reader(reader); + let fieldnorm_reader = self.fieldnorm_reader(reader)?; let fieldnorm_id = fieldnorm_reader.fieldnorm_id(doc); let phrase_count = scorer.phrase_count(); let mut explanation = Explanation::new("Phrase Scorer", scorer.score()); diff --git a/src/query/range_query.rs b/src/query/range_query.rs index bae522382..7d8aa7715 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -9,7 +9,6 @@ use crate::query::{Query, Scorer, Weight}; use crate::schema::Type; use crate::schema::{Field, IndexRecordOption, Term}; use crate::termdict::{TermDictionary, TermStreamer}; -use crate::Result; use crate::{DocId, Score}; use std::collections::Bound; use std::ops::Range; @@ -42,13 +41,13 @@ fn map_bound TTo>( /// use tantivy::query::RangeQuery; /// use tantivy::schema::{Schema, INDEXED}; /// use tantivy::{doc, Index}; -/// # fn test() -> tantivy::Result<()> { +/// # fn test() -> crate::Result<()> { /// let mut schema_builder = Schema::builder(); /// let year_field = schema_builder.add_u64_field("year", INDEXED); /// let schema = schema_builder.build(); /// /// let index = Index::create_in_ram(schema); -/// let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?; +/// let mut index_writer = index.writer_for_tests()?; /// for year in 1950u64..2017u64 { /// let num_docs_within_year = 10 + (year - 1950) * (year - 1950); /// for _ in 0..num_docs_within_year { @@ -246,7 +245,11 @@ impl RangeQuery { } impl Query for RangeQuery { - fn weight(&self, searcher: &Searcher, _scoring_enabled: bool) -> Result> { + fn weight( + &self, + searcher: &Searcher, + _scoring_enabled: bool, + ) -> crate::Result> { let schema = searcher.schema(); let value_type = schema.get_field_entry(self.field).field_type().value_type(); if value_type != self.value_type { @@ -289,7 +292,7 @@ impl RangeWeight { } impl Weight for RangeWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result> { + fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { let max_doc = reader.max_doc(); let mut doc_bitset = BitSet::with_max_value(max_doc); @@ -315,7 +318,7 @@ impl Weight for RangeWeight { Ok(Box::new(ConstScorer::new(doc_bitset, boost))) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result { + fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(does_not_match(doc)); @@ -342,7 +345,7 @@ mod tests { let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); for year in 1950u64..2017u64 { let num_docs_within_year = 10 + (year - 1950) * (year - 1950); for _ in 0..num_docs_within_year { @@ -485,7 +488,7 @@ mod tests { schema_builder.add_i64_field("year", INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema.clone()); - let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?; + let mut index_writer = index.writer_for_tests()?; let title = schema.get_field("title").unwrap(); let year = schema.get_field("year").unwrap(); index_writer.add_document(doc!( diff --git a/src/query/regex_query.rs b/src/query/regex_query.rs index 5c09f11e9..5db7a6f4c 100644 --- a/src/query/regex_query.rs +++ b/src/query/regex_query.rs @@ -103,7 +103,7 @@ mod test { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!( country_field => "japan", )); diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index a94f4f108..6b105a644 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -25,7 +25,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); let doc = doc!(text_field => "a"); index_writer.add_document(doc); assert!(index_writer.commit().is_ok()); @@ -50,7 +50,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; + let mut index_writer = index.writer_for_tests()?; for _ in 0..COMPRESSION_BLOCK_SIZE { let doc = doc!(text_field => "a"); index_writer.add_document(doc); @@ -86,7 +86,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 10_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!( left_field => "left1 left2 left2 left2f2 left2f2 left3 abcde abcde abcde abcde abcde abcde abcde abcde abcde abcewde abcde abcde", right_field => "right1 right2", @@ -136,7 +136,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 5_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field=>"a b")); index_writer.add_document(doc!(text_field=>"a c")); index_writer.delete_term(Term::from_field_text(text_field, "b")); @@ -153,7 +153,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field=>"a")); index_writer.add_document(doc!(text_field=>"a")); index_writer.commit()?; diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index d79792824..adaf38a62 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -8,7 +8,6 @@ use crate::query::weight::for_each_scorer; use crate::query::Weight; use crate::query::{Explanation, Scorer}; use crate::schema::IndexRecordOption; -use crate::Result; use crate::Term; use crate::{DocId, Score}; @@ -19,12 +18,12 @@ pub struct TermWeight { } impl Weight for TermWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> Result> { + fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { let term_scorer = self.specialized_scorer(reader, boost)?; Ok(Box::new(term_scorer)) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> Result { + fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.specialized_scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(does_not_match(doc)); @@ -32,7 +31,7 @@ impl Weight for TermWeight { Ok(scorer.explain()) } - fn count(&self, reader: &SegmentReader) -> Result { + fn count(&self, reader: &SegmentReader) -> crate::Result { if let Some(delete_bitset) = reader.delete_bitset() { Ok(self.scorer(reader, 1.0)?.count(delete_bitset)) } else { @@ -96,10 +95,10 @@ impl TermWeight { &self, reader: &SegmentReader, boost: Score, - ) -> Result { + ) -> crate::Result { let field = self.term.field(); let inverted_index = reader.inverted_index(field); - let fieldnorm_reader = reader.get_fieldnorms_reader(field); + let fieldnorm_reader = reader.get_fieldnorms_reader(field)?; let similarity_weight = self.similarity_weight.boost_by(boost); let postings_opt: Option = inverted_index.read_postings(&self.term, self.index_record_option); diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 4846f9c9d..36abed17f 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -150,7 +150,7 @@ use once_cell::sync::Lazy; use regex::Regex; /// Regular expression representing the restriction on a valid field names. -pub const FIELD_NAME_PATTERN: &'static str = r#"^[_a-zA-Z][_\-a-zA-Z0-9]*$"#; +pub const FIELD_NAME_PATTERN: &str = r#"^[_a-zA-Z][_\-a-zA-Z0-9]*$"#; /// Validator for a potential `field_name`. /// Returns true iff the name can be use for a field name. diff --git a/src/schema/value.rs b/src/schema/value.rs index c267e001c..53f75aefd 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -221,6 +221,12 @@ impl<'a> From<&'a str> for Value { } } +impl<'a> From<&'a [u8]> for Value { + fn from(bytes: &'a [u8]) -> Value { + Value::Bytes(bytes.to_vec()) + } +} + impl<'a> From for Value { fn from(facet: Facet) -> Value { Value::Facet(facet) diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 26dce3375..80d5b0eb0 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -221,7 +221,7 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str) /// # let text_field = schema_builder.add_text_field("text", TEXT); /// # let schema = schema_builder.build(); /// # let index = Index::create_in_ram(schema); -/// # let mut index_writer = index.writer_with_num_threads(1, 30_000_000)?; +/// # let mut index_writer = index.writer_for_tests()?; /// # let doc = doc!(text_field => r#"Comme je descendais des Fleuves impassibles, /// # Je ne me sentis plus guidé par les haleurs : /// # Des Peaux-Rouges criards les avaient pris pour cibles, @@ -506,7 +506,7 @@ Survey in 2016, 2017, and 2018."#; let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(text_field => "a")); index_writer.add_document(doc!(text_field => "a")); index_writer.add_document(doc!(text_field => "a b")); @@ -562,7 +562,7 @@ Survey in 2016, 2017, and 2018."#; let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { let doc = doc ! (text_field => TEST_TEXT); index_writer.add_document(doc); diff --git a/src/space_usage/mod.rs b/src/space_usage/mod.rs index e647bccd4..b09e5ed0a 100644 --- a/src/space_usage/mod.rs +++ b/src/space_usage/mod.rs @@ -336,7 +336,7 @@ mod test { let index = Index::create_in_ram(schema.clone()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(name => 1u64)); index_writer.add_document(doc!(name => 2u64)); index_writer.add_document(doc!(name => 10u64)); @@ -374,7 +374,7 @@ mod test { let index = Index::create_in_ram(schema.clone()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(name => "hi")); index_writer.add_document(doc!(name => "this is a test")); index_writer.add_document( @@ -414,7 +414,7 @@ mod test { let index = Index::create_in_ram(schema.clone()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(name => "hi")); index_writer.add_document(doc!(name => "this is a test")); index_writer.add_document( @@ -453,7 +453,7 @@ mod test { let index = Index::create_in_ram(schema.clone()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); index_writer.add_document(doc!(name => 1u64)); index_writer.add_document(doc!(name => 2u64)); index_writer.add_document(doc!(name => 3u64)); diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index 25da4b04e..fd1c4fa19 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -138,7 +138,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let mut index_writer = index.writer_for_tests().unwrap(); { { let mut doc = Document::default();