diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index 9688f3221..3389668e6 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -13,44 +13,29 @@ use crate::SegmentReader; /// use tantivy::collector::Count; /// use tantivy::query::QueryParser; /// use tantivy::schema::{Schema, TEXT}; -/// use tantivy::{doc, Index, Result}; +/// use tantivy::{doc, Index}; /// -/// # fn main() { example().unwrap(); } -/// fn example() -> Result<()> { -/// let mut schema_builder = Schema::builder(); -/// let title = schema_builder.add_text_field("title", TEXT); -/// let schema = schema_builder.build(); -/// let index = Index::create_in_ram(schema); -/// { -/// let mut index_writer = index.writer(3_000_000)?; -/// index_writer.add_document(doc!( -/// title => "The Name of the Wind", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of Muadib", -/// )); -/// index_writer.add_document(doc!( -/// title => "A Dairy Cow", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of a Young Girl", -/// )); -/// index_writer.commit().unwrap(); -/// } +/// let mut schema_builder = Schema::builder(); +/// let title = schema_builder.add_text_field("title", TEXT); +/// let schema = schema_builder.build(); +/// let index = Index::create_in_ram(schema); /// -/// let reader = index.reader()?; -/// let searcher = reader.searcher(); +/// let mut index_writer = index.writer(3_000_000).unwrap(); +/// index_writer.add_document(doc!(title => "The Name of the Wind")); +/// index_writer.add_document(doc!(title => "The Diary of Muadib")); +/// index_writer.add_document(doc!(title => "A Dairy Cow")); +/// index_writer.add_document(doc!(title => "The Diary of a Young Girl")); +/// assert!(index_writer.commit().is_ok()); /// -/// { -/// let query_parser = QueryParser::for_index(&index, vec![title]); -/// let query = query_parser.parse_query("diary")?; -/// let count = searcher.search(&query, &Count).unwrap(); +/// let reader = index.reader().unwrap(); +/// let searcher = reader.searcher(); /// -/// assert_eq!(count, 2); -/// } +/// // Here comes the important part +/// let query_parser = QueryParser::for_index(&index, vec![title]); +/// let query = query_parser.parse_query("diary").unwrap(); +/// let count = searcher.search(&query, &Count).unwrap(); /// -/// Ok(()) -/// } +/// assert_eq!(count, 2); /// ``` pub struct Count; diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index f2f84d92e..bb2d09028 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -86,7 +86,6 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// use tantivy::schema::{Facet, Schema, TEXT}; /// use tantivy::{doc, Index, Result}; /// -/// # fn main() { example().unwrap(); } /// fn example() -> Result<()> { /// let mut schema_builder = Schema::builder(); /// @@ -172,6 +171,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// /// Ok(()) /// } +/// # assert!(example().is_ok()); /// ``` pub struct FacetCollector { field: Field, diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index a7b23861a..8956fa2e7 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -108,49 +108,35 @@ impl FruitHandle { /// use tantivy::collector::{Count, TopDocs, MultiCollector}; /// use tantivy::query::QueryParser; /// use tantivy::schema::{Schema, TEXT}; -/// use tantivy::{doc, Index, Result}; +/// use tantivy::{doc, Index}; /// -/// # fn main() { example().unwrap(); } -/// fn example() -> Result<()> { -/// let mut schema_builder = Schema::builder(); -/// let title = schema_builder.add_text_field("title", TEXT); -/// let schema = schema_builder.build(); -/// let index = Index::create_in_ram(schema); -/// { -/// let mut index_writer = index.writer(3_000_000)?; -/// index_writer.add_document(doc!( -/// title => "The Name of the Wind", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of Muadib", -/// )); -/// index_writer.add_document(doc!( -/// title => "A Dairy Cow", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of a Young Girl", -/// )); -/// index_writer.commit().unwrap(); -/// } +/// let mut schema_builder = Schema::builder(); +/// let title = schema_builder.add_text_field("title", TEXT); +/// let schema = schema_builder.build(); +/// let index = Index::create_in_ram(schema); /// -/// let reader = index.reader()?; -/// let searcher = reader.searcher(); +/// let mut index_writer = index.writer(3_000_000).unwrap(); +/// index_writer.add_document(doc!(title => "The Name of the Wind")); +/// index_writer.add_document(doc!(title => "The Diary of Muadib")); +/// index_writer.add_document(doc!(title => "A Dairy Cow")); +/// index_writer.add_document(doc!(title => "The Diary of a Young Girl")); +/// assert!(index_writer.commit().is_ok()); /// -/// let mut collectors = MultiCollector::new(); -/// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2)); -/// let count_handle = collectors.add_collector(Count); -/// let query_parser = QueryParser::for_index(&index, vec![title]); -/// let query = query_parser.parse_query("diary")?; -/// let mut multi_fruit = searcher.search(&query, &collectors)?; +/// let reader = index.reader().unwrap(); +/// let searcher = reader.searcher(); /// -/// let count = count_handle.extract(&mut multi_fruit); -/// let top_docs = top_docs_handle.extract(&mut multi_fruit); +/// let mut collectors = MultiCollector::new(); +/// let top_docs_handle = collectors.add_collector(TopDocs::with_limit(2)); +/// let count_handle = collectors.add_collector(Count); +/// let query_parser = QueryParser::for_index(&index, vec![title]); +/// let query = query_parser.parse_query("diary").unwrap(); +/// let mut multi_fruit = searcher.search(&query, &collectors).unwrap(); /// -/// # assert_eq!(count, 2); -/// # assert_eq!(top_docs.len(), 2); +/// let count = count_handle.extract(&mut multi_fruit); +/// let top_docs = top_docs_handle.extract(&mut multi_fruit); /// -/// Ok(()) -/// } +/// assert_eq!(count, 2); +/// assert_eq!(top_docs.len(), 2); /// ``` #[allow(clippy::type_complexity)] #[derive(Default)] diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 70b4a98aa..b3e580570 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -29,43 +29,29 @@ use std::fmt; /// use tantivy::collector::TopDocs; /// use tantivy::query::QueryParser; /// use tantivy::schema::{Schema, TEXT}; -/// use tantivy::{doc, DocAddress, Index, Result}; +/// use tantivy::{doc, DocAddress, Index}; /// -/// # fn main() { example().unwrap(); } -/// fn example() -> Result<()> { -/// let mut schema_builder = Schema::builder(); -/// let title = schema_builder.add_text_field("title", TEXT); -/// let schema = schema_builder.build(); -/// let index = Index::create_in_ram(schema); -/// { -/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; -/// index_writer.add_document(doc!( -/// title => "The Name of the Wind", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of Muadib", -/// )); -/// index_writer.add_document(doc!( -/// title => "A Dairy Cow", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of a Young Girl", -/// )); -/// index_writer.commit().unwrap(); -/// } +/// let mut schema_builder = Schema::builder(); +/// let title = schema_builder.add_text_field("title", TEXT); +/// let schema = schema_builder.build(); +/// let index = Index::create_in_ram(schema); /// -/// let reader = index.reader()?; -/// let searcher = reader.searcher(); +/// let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); +/// index_writer.add_document(doc!(title => "The Name of the Wind")); +/// index_writer.add_document(doc!(title => "The Diary of Muadib")); +/// index_writer.add_document(doc!(title => "A Dairy Cow")); +/// index_writer.add_document(doc!(title => "The Diary of a Young Girl")); +/// assert!(index_writer.commit().is_ok()); /// -/// let query_parser = QueryParser::for_index(&index, vec![title]); -/// let query = query_parser.parse_query("diary")?; -/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2))?; +/// let reader = index.reader().unwrap(); +/// let searcher = reader.searcher(); /// -/// assert_eq!(&top_docs[0], &(0.7261542, DocAddress(0, 1))); -/// assert_eq!(&top_docs[1], &(0.6099695, DocAddress(0, 3))); +/// let query_parser = QueryParser::for_index(&index, vec![title]); +/// let query = query_parser.parse_query("diary").unwrap(); +/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap(); /// -/// Ok(()) -/// } +/// assert_eq!(&top_docs[0], &(0.7261542, DocAddress(0, 1))); +/// assert_eq!(&top_docs[1], &(0.6099695, DocAddress(0, 3))); /// ``` pub struct TopDocs(TopCollector); @@ -102,15 +88,12 @@ impl TopDocs { /// # /// # let index = Index::create_in_ram(schema); /// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; - /// # index_writer.add_document(doc!( - /// # title => "The Name of the Wind", - /// # rating => 92u64, - /// # )); + /// # index_writer.add_document(doc!(title => "The Name of the Wind", rating => 92u64)); /// # index_writer.add_document(doc!(title => "The Diary of Muadib", rating => 97u64)); /// # index_writer.add_document(doc!(title => "A Dairy Cow", rating => 63u64)); /// # index_writer.add_document(doc!(title => "The Diary of a Young Girl", rating => 80u64)); - /// # index_writer.commit()?; - /// # let reader = index.reader()?; + /// # assert!(index_writer.commit().is_ok()); + /// # let reader = index.reader().unwrap(); /// # let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?; /// # let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?; /// # assert_eq!(top_docs, @@ -202,27 +185,33 @@ impl TopDocs { /// use tantivy::collector::TopDocs; /// use tantivy::schema::Field; /// - /// # fn create_schema() -> Schema { - /// # let mut schema_builder = Schema::builder(); - /// # schema_builder.add_text_field("product_name", TEXT); - /// # schema_builder.add_u64_field("popularity", FAST); - /// # schema_builder.build() - /// # } - /// # - /// # fn main() -> tantivy::Result<()> { - /// # let schema = create_schema(); - /// # let index = Index::create_in_ram(schema); - /// # let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; - /// # let product_name = index.schema().get_field("product_name").unwrap(); - /// # + /// fn create_schema() -> Schema { + /// let mut schema_builder = Schema::builder(); + /// schema_builder.add_text_field("product_name", TEXT); + /// schema_builder.add_u64_field("popularity", FAST); + /// schema_builder.build() + /// } + /// + /// fn create_index() -> tantivy::Result { + /// let schema = create_schema(); + /// let index = Index::create_in_ram(schema); + /// let mut index_writer = index.writer_with_num_threads(1, 3_000_000)?; + /// let product_name = index.schema().get_field("product_name").unwrap(); + /// let popularity: Field = index.schema().get_field("popularity").unwrap(); + /// index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64)); + /// index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64)); + /// index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64)); + /// index_writer.commit()?; + /// Ok(index) + /// } + /// + /// let index = create_index().unwrap(); + /// let product_name = index.schema().get_field("product_name").unwrap(); /// let popularity: Field = index.schema().get_field("popularity").unwrap(); - /// # index_writer.add_document(doc!(product_name => "The Diary of Muadib", popularity => 1u64)); - /// # index_writer.add_document(doc!(product_name => "A Dairy Cow", popularity => 10u64)); - /// # index_writer.add_document(doc!(product_name => "The Diary of a Young Girl", popularity => 15u64)); - /// # index_writer.commit()?; - /// // ... - /// # let user_query = "diary"; - /// # let query = QueryParser::for_index(&index, vec![product_name]).parse_query(user_query)?; + /// + /// let user_query_str = "diary"; + /// let query_parser = QueryParser::for_index(&index, vec![product_name]); + /// let query = query_parser.parse_query(user_query_str).unwrap(); /// /// // This is where we build our collector with our custom score. /// let top_docs_by_custom_score = TopDocs @@ -249,15 +238,12 @@ impl TopDocs { /// popularity_boost_score * original_score /// } /// }); - /// # let reader = index.reader()?; - /// # let searcher = reader.searcher(); + /// let reader = index.reader().unwrap(); + /// let searcher = reader.searcher(); /// // ... and here are our documents. Note this is a simple vec. /// // The `Score` in the pair is our tweaked score. /// let resulting_docs: Vec<(Score, DocAddress)> = - /// searcher.search(&*query, &top_docs_by_custom_score)?; - /// - /// # Ok(()) - /// # } + /// searcher.search(&query, &top_docs_by_custom_score).unwrap(); /// ``` /// /// # See also diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 89fdc123f..5ee271dc8 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -191,7 +191,7 @@ impl Directory for RAMDirectory { // Reserve the path to prevent calls to .write() to succeed. self.fs.write().unwrap().write(path_buf.clone(), &[]); - let mut vec_writer = VecWriter::new(path_buf.clone(), self.clone()); + let mut vec_writer = VecWriter::new(path_buf, self.clone()); vec_writer.write_all(data)?; vec_writer.flush()?; if path == Path::new(&*META_FILEPATH) { diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index dcad09312..3d59ea218 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -138,7 +138,7 @@ fn merge( let segment_meta = index.new_segment_meta(merged_segment.id(), num_docs); - Ok(SegmentEntry::new(segment_meta.clone(), delete_cursor, None)) + Ok(SegmentEntry::new(segment_meta, delete_cursor, None)) } pub(crate) struct InnerSegmentUpdater { diff --git a/src/query/exclude.rs b/src/query/exclude.rs index f38f9ea4b..6bb6996b4 100644 --- a/src/query/exclude.rs +++ b/src/query/exclude.rs @@ -54,21 +54,21 @@ where match self.excluding_state { State::ExcludeOne(excluded_doc) => { if doc == excluded_doc { - false - } else if excluded_doc > doc { - true - } else { - match self.excluding_docset.skip_next(doc) { - SkipResult::OverStep => { - self.excluding_state = State::ExcludeOne(self.excluding_docset.doc()); - true - } - SkipResult::End => { - self.excluding_state = State::Finished; - true - } - SkipResult::Reached => false, + return false; + } + if excluded_doc > doc { + return true; + } + match self.excluding_docset.skip_next(doc) { + SkipResult::OverStep => { + self.excluding_state = State::ExcludeOne(self.excluding_docset.doc()); + true } + SkipResult::End => { + self.excluding_state = State::Finished; + true + } + SkipResult::Reached => false, } } State::Finished => true, diff --git a/src/query/fuzzy_query.rs b/src/query/fuzzy_query.rs index 25396ed15..d50cc8a25 100644 --- a/src/query/fuzzy_query.rs +++ b/src/query/fuzzy_query.rs @@ -33,7 +33,6 @@ static LEV_BUILDER: Lazy> = Laz /// use tantivy::schema::{Schema, TEXT}; /// use tantivy::{doc, Index, Result, Term}; /// -/// # fn main() { example().unwrap(); } /// fn example() -> Result<()> { /// let mut schema_builder = Schema::builder(); /// let title = schema_builder.add_text_field("title", TEXT); @@ -59,7 +58,6 @@ static LEV_BUILDER: Lazy> = Laz /// let searcher = reader.searcher(); /// /// { -/// /// let term = Term::from_field_text(title, "Diary"); /// let query = FuzzyTermQuery::new(term, 1, true); /// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count)).unwrap(); @@ -69,6 +67,7 @@ static LEV_BUILDER: Lazy> = Laz /// /// Ok(()) /// } +/// # assert!(example().is_ok()); /// ``` #[derive(Debug, Clone)] pub struct FuzzyTermQuery { diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs index e1d4fdccf..3a0902f91 100644 --- a/src/query/phrase_query/phrase_scorer.rs +++ b/src/query/phrase_query/phrase_scorer.rs @@ -4,6 +4,7 @@ use crate::postings::Postings; use crate::query::bm25::BM25Weight; use crate::query::{Intersection, Scorer}; use crate::DocId; +use std::cmp::Ordering; struct PostingsWithOffset { offset: u32, @@ -59,12 +60,16 @@ fn intersection_exists(left: &[u32], right: &[u32]) -> bool { while left_i < left.len() && right_i < right.len() { let left_val = left[left_i]; let right_val = right[right_i]; - if left_val < right_val { - left_i += 1; - } else if right_val < left_val { - right_i += 1; - } else { - return true; + match left_val.cmp(&right_val) { + Ordering::Less => { + left_i += 1; + } + Ordering::Equal => { + return true; + } + Ordering::Greater => { + right_i += 1; + } } } false @@ -77,14 +82,18 @@ fn intersection_count(left: &[u32], right: &[u32]) -> usize { while left_i < left.len() && right_i < right.len() { let left_val = left[left_i]; let right_val = right[right_i]; - if left_val < right_val { - left_i += 1; - } else if right_val < left_val { - right_i += 1; - } else { - count += 1; - left_i += 1; - right_i += 1; + match left_val.cmp(&right_val) { + Ordering::Less => { + left_i += 1; + } + Ordering::Equal => { + count += 1; + left_i += 1; + right_i += 1; + } + Ordering::Greater => { + right_i += 1; + } } } count @@ -103,15 +112,19 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize { while left_i < left_len && right_i < right_len { let left_val = left[left_i]; let right_val = right[right_i]; - if left_val < right_val { - left_i += 1; - } else if right_val < left_val { - right_i += 1; - } else { - left[count] = left_val; - count += 1; - left_i += 1; - right_i += 1; + match left_val.cmp(&right_val) { + Ordering::Less => { + left_i += 1; + } + Ordering::Equal => { + left[count] = left_val; + count += 1; + left_i += 1; + right_i += 1; + } + Ordering::Greater => { + right_i += 1; + } } } count diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 4f80048e6..c0d5afff9 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -38,41 +38,33 @@ fn map_bound TTo>( /// # Example /// /// ```rust -/// # use tantivy::collector::Count; -/// # use tantivy::query::RangeQuery; -/// # use tantivy::schema::{Schema, INDEXED}; -/// # use tantivy::{doc, Index, Result}; -/// # -/// # fn run() -> Result<()> { -/// # let mut schema_builder = Schema::builder(); -/// # let year_field = schema_builder.add_u64_field("year", INDEXED); -/// # let schema = schema_builder.build(); -/// # -/// # let index = Index::create_in_ram(schema); -/// # { -/// # let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap(); -/// # for year in 1950u64..2017u64 { -/// # let num_docs_within_year = 10 + (year - 1950) * (year - 1950); -/// # for _ in 0..num_docs_within_year { -/// # index_writer.add_document(doc!(year_field => year)); -/// # } -/// # } -/// # index_writer.commit().unwrap(); -/// # } -/// # let reader = index.reader()?; +/// use tantivy::collector::Count; +/// use tantivy::query::RangeQuery; +/// use tantivy::schema::{Schema, INDEXED}; +/// use tantivy::{doc, Index}; +/// # fn test() -> tantivy::Result<()> { +/// let mut schema_builder = Schema::builder(); +/// let year_field = schema_builder.add_u64_field("year", INDEXED); +/// let schema = schema_builder.build(); +/// +/// let index = Index::create_in_ram(schema); +/// let mut index_writer = index.writer_with_num_threads(1, 6_000_000)?; +/// for year in 1950u64..2017u64 { +/// let num_docs_within_year = 10 + (year - 1950) * (year - 1950); +/// for _ in 0..num_docs_within_year { +/// index_writer.add_document(doc!(year_field => year)); +/// } +/// } +/// index_writer.commit()?; +/// +/// let reader = index.reader()?; /// let searcher = reader.searcher(); -/// /// let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960..1970); -/// /// let num_60s_books = searcher.search(&docs_in_the_sixties, &Count)?; -/// -/// # assert_eq!(num_60s_books, 2285); -/// # Ok(()) -/// # } -/// # -/// # fn main() { -/// # run().unwrap() +/// assert_eq!(num_60s_books, 2285); +/// Ok(()) /// # } +/// # assert!(test().is_ok()); /// ``` #[derive(Clone, Debug)] pub struct RangeQuery { diff --git a/src/query/regex_query.rs b/src/query/regex_query.rs index 14c281d4f..2280ba67e 100644 --- a/src/query/regex_query.rs +++ b/src/query/regex_query.rs @@ -15,40 +15,40 @@ use tantivy_fst::Regex; /// use tantivy::collector::Count; /// use tantivy::query::RegexQuery; /// use tantivy::schema::{Schema, TEXT}; -/// use tantivy::{doc, Index, Result, Term}; +/// use tantivy::{doc, Index, Term}; /// -/// # fn main() { example().unwrap(); } -/// fn example() -> Result<()> { -/// let mut schema_builder = Schema::builder(); -/// let title = schema_builder.add_text_field("title", TEXT); -/// let schema = schema_builder.build(); -/// let index = Index::create_in_ram(schema); -/// { -/// let mut index_writer = index.writer(3_000_000)?; -/// index_writer.add_document(doc!( -/// title => "The Name of the Wind", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of Muadib", -/// )); -/// index_writer.add_document(doc!( -/// title => "A Dairy Cow", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of a Young Girl", -/// )); -/// index_writer.commit().unwrap(); -/// } -/// -/// let reader = index.reader()?; -/// let searcher = reader.searcher(); -/// -/// let term = Term::from_field_text(title, "Diary"); -/// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?; -/// let count = searcher.search(&query, &Count)?; -/// assert_eq!(count, 3); -/// Ok(()) +/// # fn test() -> tantivy::Result<()> { +/// let mut schema_builder = Schema::builder(); +/// let title = schema_builder.add_text_field("title", TEXT); +/// let schema = schema_builder.build(); +/// let index = Index::create_in_ram(schema); +/// { +/// let mut index_writer = index.writer(3_000_000)?; +/// index_writer.add_document(doc!( +/// title => "The Name of the Wind", +/// )); +/// index_writer.add_document(doc!( +/// title => "The Diary of Muadib", +/// )); +/// index_writer.add_document(doc!( +/// title => "A Dairy Cow", +/// )); +/// index_writer.add_document(doc!( +/// title => "The Diary of a Young Girl", +/// )); +/// index_writer.commit().unwrap(); /// } +/// +/// let reader = index.reader()?; +/// let searcher = reader.searcher(); +/// +/// let term = Term::from_field_text(title, "Diary"); +/// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?; +/// let count = searcher.search(&query, &Count)?; +/// assert_eq!(count, 3); +/// Ok(()) +/// # } +/// # assert!(test().is_ok()); /// ``` #[derive(Debug, Clone)] pub struct RegexQuery { diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs index 64152935a..402a1d738 100644 --- a/src/query/term_query/term_query.rs +++ b/src/query/term_query/term_query.rs @@ -23,42 +23,39 @@ use std::fmt; /// use tantivy::collector::{Count, TopDocs}; /// use tantivy::query::TermQuery; /// use tantivy::schema::{Schema, TEXT, IndexRecordOption}; -/// use tantivy::{doc, Index, Result, Term}; -/// -/// # fn main() { example().unwrap(); } -/// fn example() -> Result<()> { -/// let mut schema_builder = Schema::builder(); -/// let title = schema_builder.add_text_field("title", TEXT); -/// let schema = schema_builder.build(); -/// let index = Index::create_in_ram(schema); -/// { -/// let mut index_writer = index.writer(3_000_000)?; -/// index_writer.add_document(doc!( -/// title => "The Name of the Wind", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of Muadib", -/// )); -/// index_writer.add_document(doc!( -/// title => "A Dairy Cow", -/// )); -/// index_writer.add_document(doc!( -/// title => "The Diary of a Young Girl", -/// )); -/// index_writer.commit()?; -/// } -/// let reader = index.reader()?; -/// let searcher = reader.searcher(); -/// -/// let query = TermQuery::new( -/// Term::from_field_text(title, "diary"), -/// IndexRecordOption::Basic, -/// ); -/// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count)).unwrap(); -/// assert_eq!(count, 2); -/// -/// Ok(()) +/// use tantivy::{doc, Index, Term}; +/// # fn test() -> tantivy::Result<()> { +/// let mut schema_builder = Schema::builder(); +/// let title = schema_builder.add_text_field("title", TEXT); +/// let schema = schema_builder.build(); +/// let index = Index::create_in_ram(schema); +/// { +/// let mut index_writer = index.writer(3_000_000)?; +/// index_writer.add_document(doc!( +/// title => "The Name of the Wind", +/// )); +/// index_writer.add_document(doc!( +/// title => "The Diary of Muadib", +/// )); +/// index_writer.add_document(doc!( +/// title => "A Dairy Cow", +/// )); +/// index_writer.add_document(doc!( +/// title => "The Diary of a Young Girl", +/// )); +/// index_writer.commit()?; /// } +/// let reader = index.reader()?; +/// let searcher = reader.searcher(); +/// let query = TermQuery::new( +/// Term::from_field_text(title, "diary"), +/// IndexRecordOption::Basic, +/// ); +/// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?; +/// assert_eq!(count, 2); +/// Ok(()) +/// # } +/// # assert!(test().is_ok()); /// ``` #[derive(Clone)] pub struct TermQuery { diff --git a/src/schema/flags.rs b/src/schema/flags.rs index 81a93b99d..28e252f85 100644 --- a/src/schema/flags.rs +++ b/src/schema/flags.rs @@ -53,7 +53,7 @@ where fn bitor(self, head: SchemaFlagList) -> Self::Output { SchemaFlagList { head: head.head, - tail: self.clone(), + tail: self, } } } diff --git a/src/tokenizer/alphanum_only.rs b/src/tokenizer/alphanum_only.rs index 3c37c4c9e..5d15e45e5 100644 --- a/src/tokenizer/alphanum_only.rs +++ b/src/tokenizer/alphanum_only.rs @@ -2,8 +2,6 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! # fn main() { -//! //! let tokenizer = RawTokenizer //! .filter(AlphaNumOnlyFilter); //! @@ -20,7 +18,6 @@ //! assert!(stream.next().is_some()); //! // the "emoji" is dropped because its not an alphanum //! assert!(stream.next().is_none()); -//! # } //! ``` use super::{Token, TokenFilter, TokenStream}; diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 72cf980a4..d0aaaab9a 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -7,7 +7,6 @@ //! ```rust //! use tantivy::schema::*; //! -//! # fn main() { //! let mut schema_builder = Schema::builder(); //! //! let text_options = TextOptions::default() @@ -31,7 +30,6 @@ //! schema_builder.add_text_field("uuid", id_options); //! //! let schema = schema_builder.build(); -//! # } //! ``` //! //! By default, `tantivy` offers the following tokenizers: @@ -66,12 +64,10 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! # fn main() { //! let en_stem = SimpleTokenizer //! .filter(RemoveLongFilter::limit(40)) //! .filter(LowerCaser) //! .filter(Stemmer::new(Language::English)); -//! # } //! ``` //! //! Once your tokenizer is defined, you need to @@ -81,13 +77,12 @@ //! # use tantivy::schema::Schema; //! # use tantivy::tokenizer::*; //! # use tantivy::Index; -//! # fn main() { -//! # let custom_en_tokenizer = SimpleTokenizer; +//! # +//! let custom_en_tokenizer = SimpleTokenizer; //! # let schema = Schema::builder().build(); //! let index = Index::create_in_ram(schema); //! index.tokenizers() //! .register("custom_en", custom_en_tokenizer); -//! # } //! ``` //! //! If you built your schema programmatically, a complete example @@ -102,7 +97,6 @@ //! use tantivy::tokenizer::*; //! use tantivy::Index; //! -//! # fn main() { //! let mut schema_builder = Schema::builder(); //! let text_field_indexing = TextFieldIndexing::default() //! .set_tokenizer("custom_en") @@ -121,8 +115,6 @@ //! index //! .tokenizers() //! .register("custom_en", custom_en_tokenizer); -//! // ... -//! # } //! ``` //! mod alphanum_only; diff --git a/src/tokenizer/ngram_tokenizer.rs b/src/tokenizer/ngram_tokenizer.rs index e3731d73f..50aeca9a6 100644 --- a/src/tokenizer/ngram_tokenizer.rs +++ b/src/tokenizer/ngram_tokenizer.rs @@ -31,7 +31,7 @@ use super::{Token, TokenStream, Tokenizer}; /// /// ```rust /// use tantivy::tokenizer::*; -/// # fn main() { +/// /// let tokenizer = NgramTokenizer::new(2, 3, false); /// let mut stream = tokenizer.token_stream("hello"); /// { @@ -77,7 +77,6 @@ use super::{Token, TokenStream, Tokenizer}; /// assert_eq!(token.offset_to, 5); /// } /// assert!(stream.next().is_none()); -/// # } /// ``` #[derive(Clone)] pub struct NgramTokenizer { diff --git a/src/tokenizer/remove_long.rs b/src/tokenizer/remove_long.rs index a81be9f84..31b824f26 100644 --- a/src/tokenizer/remove_long.rs +++ b/src/tokenizer/remove_long.rs @@ -2,8 +2,6 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! # fn main() { -//! //! let tokenizer = SimpleTokenizer //! .filter(RemoveLongFilter::limit(5)); //! @@ -12,7 +10,6 @@ //! // out of the token stream. //! assert_eq!(stream.next().unwrap().text, "nice"); //! assert!(stream.next().is_none()); -//! # } //! ``` //! use super::{Token, TokenFilter, TokenStream}; diff --git a/src/tokenizer/stop_word_filter.rs b/src/tokenizer/stop_word_filter.rs index 623388e2d..e554a330e 100644 --- a/src/tokenizer/stop_word_filter.rs +++ b/src/tokenizer/stop_word_filter.rs @@ -2,7 +2,6 @@ //! ```rust //! use tantivy::tokenizer::*; //! -//! # fn main() { //! let tokenizer = SimpleTokenizer //! .filter(StopWordFilter::remove(vec!["the".to_string(), "is".to_string()])); //! @@ -10,7 +9,6 @@ //! assert_eq!(stream.next().unwrap().text, "fox"); //! assert_eq!(stream.next().unwrap().text, "crafty"); //! assert!(stream.next().is_none()); -//! # } //! ``` use super::{Token, TokenFilter, TokenStream}; use fnv::FnvHasher; @@ -46,7 +44,7 @@ impl StopWordFilter { "there", "these", "they", "this", "to", "was", "will", "with", ]; - StopWordFilter::remove(words.iter().map(|s| s.to_string()).collect()) + StopWordFilter::remove(words.iter().map(|&s| s.to_string()).collect()) } } diff --git a/src/tokenizer/tokenizer.rs b/src/tokenizer/tokenizer.rs index 4b36c317c..78a0eaf4b 100644 --- a/src/tokenizer/tokenizer.rs +++ b/src/tokenizer/tokenizer.rs @@ -58,12 +58,10 @@ pub trait Tokenizer<'a>: Sized + Clone { /// ```rust /// use tantivy::tokenizer::*; /// - /// # fn main() { /// let en_stem = SimpleTokenizer /// .filter(RemoveLongFilter::limit(40)) /// .filter(LowerCaser) /// .filter(Stemmer::default()); - /// # } /// ``` /// fn filter(self, new_filter: NewFilter) -> ChainTokenizer @@ -188,7 +186,6 @@ impl<'b> TokenStream for Box { /// ``` /// use tantivy::tokenizer::*; /// -/// # fn main() { /// let tokenizer = SimpleTokenizer /// .filter(RemoveLongFilter::limit(40)) /// .filter(LowerCaser); @@ -207,7 +204,6 @@ impl<'b> TokenStream for Box { /// assert_eq!(token.offset_to, 12); /// assert_eq!(token.position, 1); /// } -/// # } /// ``` /// pub trait TokenStream { @@ -227,17 +223,15 @@ pub trait TokenStream { /// and `.token()`. /// /// ``` - /// # use tantivy::tokenizer::*; - /// # - /// # fn main() { - /// # let tokenizer = SimpleTokenizer - /// # .filter(RemoveLongFilter::limit(40)) - /// # .filter(LowerCaser); + /// use tantivy::tokenizer::*; + /// + /// let tokenizer = SimpleTokenizer + /// .filter(RemoveLongFilter::limit(40)) + /// .filter(LowerCaser); /// let mut token_stream = tokenizer.token_stream("Hello, happy tax payer"); /// while let Some(token) = token_stream.next() { /// println!("Token {:?}", token.text); /// } - /// # } /// ``` fn next(&mut self) -> Option<&Token> { if self.advance() {