From 63b593bd0a3ba507c87c2f37a0a28f0b6205ccaf Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 24 Jan 2019 09:10:38 +0900 Subject: [PATCH] Lower RAM usage in tests. --- Cargo.toml | 2 +- src/collector/int_facet_collector.rs | 2 +- src/collector/top_score_collector.rs | 2 +- src/indexer/index_writer.rs | 10 ++++---- src/indexer/merger.rs | 14 ++++++------ src/indexer/segment_manager.rs | 11 +++++++-- src/indexer/segment_updater.rs | 10 ++++---- src/lib.rs | 34 ++++++++++++---------------- src/postings/mod.rs | 13 ++++++----- src/postings/segment_postings.rs | 4 ++-- src/query/boolean_query/mod.rs | 2 +- src/query/phrase_query/mod.rs | 8 +++---- src/query/term_query/mod.rs | 2 +- src/snippet/mod.rs | 4 ++-- src/termdict/mod.rs | 2 +- src/tokenizer/mod.rs | 10 ++------ src/tokenizer/stemmer.rs | 4 ++-- src/tokenizer/tokenizer_manager.rs | 2 +- 18 files changed, 68 insertions(+), 68 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c9a7034d7..11c2af204 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.8.2-dev" +version = "0.9.0-dev" authors = ["Paul Masurel "] license = "MIT" categories = ["database-implementations", "data-structures"] diff --git a/src/collector/int_facet_collector.rs b/src/collector/int_facet_collector.rs index ac53b9908..01f00cc37 100644 --- a/src/collector/int_facet_collector.rs +++ b/src/collector/int_facet_collector.rs @@ -88,7 +88,7 @@ mod tests { let index = Index::create_in_ram(schema.clone()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { for i in 0u64..10u64 { index_writer.add_document(doc!( diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 2ba5ffc87..869022686 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -142,7 +142,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"Hello happy tax payer.")); index_writer.add_document(doc!(text_field=>"Droopy says hello happy tax payer")); index_writer.add_document(doc!(text_field=>"I like Droopy")); diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 2f0625217..273dcc21c 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -657,8 +657,8 @@ mod tests { fn test_lockfile_stops_duplicates() { let schema_builder = schema::Schema::builder(); let index = Index::create_in_ram(schema_builder.build()); - let _index_writer = index.writer(40_000_000).unwrap(); - match index.writer(40_000_000) { + let _index_writer = index.writer(3_000_000).unwrap(); + match index.writer(3_000_000) { Err(TantivyError::LockFailure(_)) => {} _ => panic!("Expected FileAlreadyExists error"), } @@ -683,7 +683,7 @@ mod tests { fn test_set_merge_policy() { let schema_builder = schema::Schema::builder(); let index = Index::create_in_ram(schema_builder.build()); - let index_writer = index.writer(40_000_000).unwrap(); + let index_writer = index.writer(3_000_000).unwrap(); assert_eq!( format!("{:?}", index_writer.get_merge_policy()), "LogMergePolicy { min_merge_size: 8, min_layer_size: 10000, \ @@ -702,11 +702,11 @@ mod tests { let schema_builder = schema::Schema::builder(); let index = Index::create_in_ram(schema_builder.build()); { - let _index_writer = index.writer(40_000_000).unwrap(); + let _index_writer = index.writer(3_000_000).unwrap(); // the lock should be released when the // index_writer leaves the scope. } - let _index_writer_two = index.writer(40_000_000).unwrap(); + let _index_writer_two = index.writer(3_000_000).unwrap(); } #[test] diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 67538c051..311240102 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -836,7 +836,7 @@ mod tests { let score_field = schema_builder.add_u64_field("score", score_fieldtype); let bytes_score_field = schema_builder.add_bytes_field("score_bytes"); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let search_term = |searcher: &Searcher, term: Term| { let collector = FastFieldTestCollector::for_field(score_field); @@ -1145,7 +1145,7 @@ mod tests { let facet_field = schema_builder.add_facet_field("facet"); let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let index_doc = |index_writer: &mut IndexWriter, doc_facets: &[&str]| { let mut doc = Document::default(); for facet in doc_facets { @@ -1210,7 +1210,7 @@ mod tests { let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer .merge(&segment_ids) .expect("Failed to initiate merge") @@ -1233,7 +1233,7 @@ mod tests { // Deleting one term { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let facet = Facet::from_path(vec!["top", "a", "firstdoc"]); let facet_term = Term::from_facet(facet_field, &facet); index_writer.delete_term(facet_term); @@ -1291,7 +1291,7 @@ mod tests { let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut doc = Document::default(); doc.add_u64(int_field, 1); index_writer.add_document(doc.clone()); @@ -1334,7 +1334,7 @@ mod tests { let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let index_doc = |index_writer: &mut IndexWriter, int_vals: &[u64]| { let mut doc = Document::default(); for &val in int_vals { @@ -1423,7 +1423,7 @@ mod tests { let segment_ids = index .searchable_segment_ids() .expect("Searchable segments failed."); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer .merge(&segment_ids) .expect("Failed to initiate merge") diff --git a/src/indexer/segment_manager.rs b/src/indexer/segment_manager.rs index 1e805eba1..c0089b262 100644 --- a/src/indexer/segment_manager.rs +++ b/src/indexer/segment_manager.rs @@ -109,9 +109,16 @@ impl SegmentManager { /// Deletes all empty segments fn remove_empty_segments(&self) { let mut registers_lock = self.write(); - registers_lock.committed.segment_entries().iter() + registers_lock + .committed + .segment_entries() + .iter() .filter(|segment| segment.meta().num_docs() == 0) - .for_each(|segment| registers_lock.committed.remove_segment(&segment.segment_id())); + .for_each(|segment| { + registers_lock + .committed + .remove_segment(&segment.segment_id()) + }); } pub fn commit(&self, segment_entries: Vec) { diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index b87aa717b..defd5d5be 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -555,7 +555,7 @@ mod tests { let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.set_merge_policy(Box::new(MergeWheneverPossible)); { @@ -609,7 +609,7 @@ mod tests { let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { for _ in 0..100 { @@ -634,7 +634,8 @@ mod tests { } { - let seg_ids = index.searchable_segment_ids() + let seg_ids = index + .searchable_segment_ids() .expect("Searchable segments failed."); // docs exist, should have at least 1 segment assert!(seg_ids.len() > 0); @@ -658,7 +659,8 @@ mod tests { index.load_searchers().unwrap(); assert_eq!(index.searcher().num_docs(), 0); - let seg_ids = index.searchable_segment_ids() + let seg_ids = index + .searchable_segment_ids() .expect("Searchable segments failed."); assert!(seg_ids.is_empty()); diff --git a/src/lib.rs b/src/lib.rs index 49fb977e4..3262fd768 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -231,11 +231,7 @@ pub use common::{i64_to_u64, u64_to_i64}; /// Expose the current version of tantivy, as well /// whether it was compiled with the simd compression. pub fn version() -> &'static str { - if cfg!(feature = "simdcompression") { - concat!(env!("CARGO_PKG_VERSION"), "-simd") - } else { - concat!(env!("CARGO_PKG_VERSION"), "-nosimd") - } + env!("CARGO_PKG_VERSION") } /// Defines tantivy's merging strategy @@ -348,7 +344,7 @@ mod tests { let index = Index::create_from_tempdir(schema).unwrap(); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field=>"af b"); index_writer.add_document(doc); @@ -370,7 +366,7 @@ mod tests { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { index_writer.add_document(doc!(text_field=>"a b c")); index_writer.commit().unwrap(); @@ -412,7 +408,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field=>"a b c"); index_writer.add_document(doc); @@ -440,7 +436,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field=>"a b c"); index_writer.add_document(doc); @@ -487,7 +483,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); // 0 index_writer.add_document(doc!(text_field=>"a b")); // 1 @@ -534,7 +530,7 @@ mod tests { } { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); // 0 index_writer.add_document(doc!(text_field=>"a b")); // 1 @@ -571,7 +567,7 @@ mod tests { } { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"a b")); index_writer.delete_term(Term::from_field_text(text_field, "c")); index_writer.rollback().unwrap(); @@ -620,7 +616,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(field=>1u64)); index_writer.commit().unwrap(); index.load_searchers().unwrap(); @@ -643,7 +639,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let negative_val = -1i64; index_writer.add_document(doc!(value_field => negative_val)); index_writer.commit().unwrap(); @@ -667,7 +663,7 @@ mod tests { let absent_field = schema_builder.add_text_field("text", TEXT); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(2, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"a")); assert!(index_writer.commit().is_ok()); assert!(index.load_searchers().is_ok()); @@ -684,7 +680,7 @@ mod tests { let index = Index::create_in_ram(schema); // writing the segment - let mut index_writer = index.writer_with_num_threads(2, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); let add_document = |index_writer: &mut IndexWriter, val: &'static str| { let doc = doc!(text_field=>val); @@ -720,7 +716,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field=>"af af af bc bc"); index_writer.add_document(doc); @@ -756,7 +752,7 @@ mod tests { { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"af af af b")); index_writer.add_document(doc!(text_field=>"a b c")); index_writer.add_document(doc!(text_field=>"a b c d")); @@ -809,7 +805,7 @@ mod tests { { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field=>"af b"); index_writer.add_document(doc); diff --git a/src/postings/mod.rs b/src/postings/mod.rs index efaa006ea..c94e887a9 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -280,7 +280,7 @@ pub mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let mut doc = Document::default(); doc.add_text(text_field, "g b b d c g c"); @@ -322,7 +322,7 @@ pub mod tests { let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); for i in 0..num_docs { let mut doc = Document::default(); doc.add_u64(value_field, 2); @@ -399,7 +399,7 @@ pub mod tests { // delete some of the documents { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.delete_term(term_0); assert!(index_writer.commit().is_ok()); } @@ -449,7 +449,7 @@ pub mod tests { // delete everything else { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.delete_term(term_1); assert!(index_writer.commit().is_ok()); @@ -460,7 +460,8 @@ pub mod tests { // finally, check that it's empty { - let searchable_segment_ids = index.searchable_segment_ids() + let searchable_segment_ids = index + .searchable_segment_ids() .expect("could not get index segment ids"); assert!(searchable_segment_ids.is_empty()); assert_eq!(searcher.num_docs(), 0); @@ -494,7 +495,7 @@ pub mod tests { let index = Index::create_in_ram(schema); let posting_list_size = 1_000_000; { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); for _ in 0..posting_list_size { let mut doc = Document::default(); if rng.gen_bool(1f64 / 15f64) { diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index 88f13ab7f..40640c8f9 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -752,7 +752,7 @@ mod tests { let int_field = schema_builder.add_u64_field("id", INT_INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); let mut last_doc = 0u32; for &doc in docs { for _ in last_doc..doc { @@ -823,7 +823,7 @@ mod tests { let int_field = schema_builder.add_u64_field("id", INT_INDEXED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); // create two postings list, one containg even number, // the other containing odd numbers. for i in 0..6 { diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs index 544e615fc..622575661 100644 --- a/src/query/boolean_query/mod.rs +++ b/src/query/boolean_query/mod.rs @@ -29,7 +29,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field => "a b c"); index_writer.add_document(doc); diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs index 690ccdd39..90ae26451 100644 --- a/src/query/phrase_query/mod.rs +++ b/src/query/phrase_query/mod.rs @@ -24,7 +24,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); for &text in texts { let doc = doc!(text_field=>text); index_writer.add_document(doc); @@ -86,7 +86,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"a b c")); assert!(index_writer.commit().is_ok()); } @@ -141,7 +141,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"b")); index_writer.add_document(doc!(text_field=>"a b")); index_writer.add_document(doc!(text_field=>"b a")); @@ -173,7 +173,7 @@ mod tests { let schema = schema_builder.build(); let index = Index::create_in_ram(schema); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field=>"a b c d e f g h")); assert!(index_writer.commit().is_ok()); } diff --git a/src/query/term_query/mod.rs b/src/query/term_query/mod.rs index 11f6ea934..edc4af411 100644 --- a/src/query/term_query/mod.rs +++ b/src/query/term_query/mod.rs @@ -25,7 +25,7 @@ mod tests { let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc!(text_field => "a"); index_writer.add_document(doc); diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 77a289baf..8a3895acb 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -523,7 +523,7 @@ Survey in 2016, 2017, and 2018."#; let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); index_writer.add_document(doc!(text_field => "a")); index_writer.add_document(doc!(text_field => "a")); index_writer.add_document(doc!(text_field => "a b")); @@ -580,7 +580,7 @@ Survey in 2016, 2017, and 2018."#; let index = Index::create_in_ram(schema); { // writing the segment - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { let doc = doc ! (text_field => TEST_TEXT); index_writer.add_document(doc); diff --git a/src/termdict/mod.rs b/src/termdict/mod.rs index f0db51a39..f3157118c 100644 --- a/src/termdict/mod.rs +++ b/src/termdict/mod.rs @@ -133,7 +133,7 @@ mod tests { let text_field = schema_builder.add_text_field("text", TEXT); let index = Index::create_in_ram(schema_builder.build()); { - let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); { { let mut doc = Document::default(); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index bef61daaf..8ef2f8be0 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -148,7 +148,7 @@ pub use self::ngram_tokenizer::NgramTokenizer; pub use self::raw_tokenizer::RawTokenizer; pub use self::remove_long::RemoveLongFilter; pub use self::simple_tokenizer::SimpleTokenizer; -pub use self::stemmer::{Stemmer, Language}; +pub use self::stemmer::{Language, Stemmer}; pub use self::stop_word_filter::StopWordFilter; pub(crate) use self::token_stream_chain::TokenStreamChain; pub(crate) use self::tokenizer::box_tokenizer; @@ -160,14 +160,8 @@ pub use self::tokenizer_manager::TokenizerManager; #[cfg(test)] pub mod tests { use super::{ - Token, + Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, Token, Tokenizer, TokenizerManager, - SimpleTokenizer, - Tokenizer, - RemoveLongFilter, - LowerCaser, - Stemmer, - Language }; /// This is a function that can be used in tests and doc tests diff --git a/src/tokenizer/stemmer.rs b/src/tokenizer/stemmer.rs index 19980a59d..dbb15c554 100644 --- a/src/tokenizer/stemmer.rs +++ b/src/tokenizer/stemmer.rs @@ -23,7 +23,7 @@ pub enum Language { Spanish, Swedish, Tamil, - Turkish + Turkish, } impl Language { @@ -46,7 +46,7 @@ impl Language { Spanish => Algorithm::Spanish, Swedish => Algorithm::Swedish, Tamil => Algorithm::Tamil, - Turkish => Algorithm::Turkish + Turkish => Algorithm::Turkish, } } } diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs index 37115e976..7f97c589b 100644 --- a/src/tokenizer/tokenizer_manager.rs +++ b/src/tokenizer/tokenizer_manager.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::sync::{Arc, RwLock}; use tokenizer::box_tokenizer; +use tokenizer::stemmer::Language; use tokenizer::BoxedTokenizer; use tokenizer::LowerCaser; use tokenizer::RawTokenizer; @@ -8,7 +9,6 @@ use tokenizer::RemoveLongFilter; use tokenizer::SimpleTokenizer; use tokenizer::Stemmer; use tokenizer::Tokenizer; -use tokenizer::stemmer::Language; /// The tokenizer manager serves as a store for /// all of the pre-configured tokenizer pipelines.