From b3bf9a571622c6bf16b868753c7f2ec336fb0823 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Wed, 5 Oct 2022 14:14:26 +0700 Subject: [PATCH] Documentation improvements. --- src/aggregation/bucket/histogram/histogram.rs | 2 +- src/core/inverted_index_reader.rs | 23 +++++++++---------- src/core/segment_reader.rs | 6 ++--- src/directory/file_slice.rs | 4 ++-- src/indexer/segment_updater.rs | 12 +++++----- src/postings/recorder.rs | 6 ++--- src/postings/stacker/term_hashmap.rs | 2 +- src/schema/bytes_options.rs | 2 +- src/schema/mod.rs | 19 ++++++++------- src/schema/term.rs | 12 +++++----- src/schema/text_options.rs | 4 +++- src/store/reader.rs | 20 ++++++++-------- 12 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index f485b07b3..92053fc21 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -452,7 +452,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps( histogram_req: &HistogramAggregation, sub_aggregation: &AggregationsInternal, ) -> crate::Result> { - // Generate the the full list of buckets without gaps. + // Generate the full list of buckets without gaps. // // The bounds are the min max from the current buckets, optionally extended by // extended_bounds from the request diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 47002d71a..852db94ed 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -15,12 +15,11 @@ use crate::termdict::TermDictionary; /// /// It is safe to delete the segment associated with /// an `InvertedIndexReader`. As long as it is open, -/// the `FileSlice` it is relying on should +/// the [`FileSlice`] it is relying on should /// stay available. /// -/// /// `InvertedIndexReader` are created by calling -/// the `SegmentReader`'s [`.inverted_index(...)`] method +/// [`SegmentReader::inverted_index()`](crate::SegmentReader::inverted_index). pub struct InvertedIndexReader { termdict: TermDictionary, postings_file_slice: FileSlice, @@ -75,7 +74,7 @@ impl InvertedIndexReader { /// /// This is useful for enumerating through a list of terms, /// and consuming the associated posting lists while avoiding - /// reallocating a `BlockSegmentPostings`. + /// reallocating a [`BlockSegmentPostings`]. /// /// # Warning /// @@ -96,7 +95,7 @@ impl InvertedIndexReader { /// Returns a block postings given a `Term`. /// This method is for an advanced usage only. /// - /// Most user should prefer using `read_postings` instead. + /// Most users should prefer using [`Self::read_postings()`] instead. pub fn read_block_postings( &self, term: &Term, @@ -110,7 +109,7 @@ impl InvertedIndexReader { /// Returns a block postings given a `term_info`. /// This method is for an advanced usage only. /// - /// Most user should prefer using `read_postings` instead. + /// Most users should prefer using [`Self::read_postings()`] instead. pub fn read_block_postings_from_terminfo( &self, term_info: &TermInfo, @@ -130,7 +129,7 @@ impl InvertedIndexReader { /// Returns a posting object given a `term_info`. /// This method is for an advanced usage only. /// - /// Most user should prefer using `read_postings` instead. + /// Most users should prefer using [`Self::read_postings()`] instead. pub fn read_postings_from_terminfo( &self, term_info: &TermInfo, @@ -164,12 +163,12 @@ impl InvertedIndexReader { /// or `None` if the term has never been encountered and indexed. /// /// If the field was not indexed with the indexing options that cover - /// the requested options, the returned `SegmentPostings` the method does not fail + /// the requested options, the returned [`SegmentPostings`] the method does not fail /// and returns a `SegmentPostings` with as much information as possible. /// - /// For instance, requesting `IndexRecordOption::Freq` for a - /// `TextIndexingOptions` that does not index position will return a `SegmentPostings` - /// with `DocId`s and frequencies. + /// For instance, requesting [`IndexRecordOption::WithFreqs`] for a + /// [`TextOptions`](crate::schema::TextOptions) that does not index position + /// will return a [`SegmentPostings`] with `DocId`s and frequencies. pub fn read_postings( &self, term: &Term, @@ -211,7 +210,7 @@ impl InvertedIndexReader { /// Returns a block postings given a `Term`. /// This method is for an advanced usage only. /// - /// Most user should prefer using `read_postings` instead. + /// Most users should prefer using [`Self::read_postings()`] instead. pub async fn warm_postings( &self, term: &Term, diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index 88b5e4dfa..1a9512078 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -216,10 +216,10 @@ impl SegmentReader { /// term dictionary associated with a specific field, /// and opening the posting list associated with any term. /// - /// If the field is not marked as index, a warn is logged and an empty `InvertedIndexReader` + /// If the field is not marked as index, a warning is logged and an empty `InvertedIndexReader` /// is returned. - /// Similarly if the field is marked as indexed but no term has been indexed for the given - /// index. an empty `InvertedIndexReader` is returned (but no warning is logged). + /// Similarly, if the field is marked as indexed but no term has been indexed for the given + /// index, an empty `InvertedIndexReader` is returned (but no warning is logged). pub fn inverted_index(&self, field: Field) -> crate::Result> { if let Some(inv_idx_reader) = self .inv_idx_reader_cache diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs index 3f7859cb7..1d0cd6915 100644 --- a/src/directory/file_slice.rs +++ b/src/directory/file_slice.rs @@ -13,8 +13,8 @@ use crate::directory::OwnedBytes; /// By contract, whatever happens to the directory file, as long as a FileHandle /// is alive, the data associated with it cannot be altered or destroyed. /// -/// The underlying behavior is therefore specific to the `Directory` that created it. -/// Despite its name, a `FileSlice` may or may not directly map to an actual file +/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that +/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file /// on the filesystem. #[async_trait] diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 198121bf6..c0269496c 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -133,15 +133,15 @@ fn merge( /// Advanced: Merges a list of segments from different indices in a new index. /// -/// Returns `TantivyError` if the the indices list is empty or their +/// Returns `TantivyError` if the indices list is empty or their /// schemas don't match. /// /// `output_directory`: is assumed to be empty. /// /// # Warning /// This function does NOT check or take the `IndexWriter` is running. It is not -/// meant to work if you have an IndexWriter running for the origin indices, or -/// the destination Index. +/// meant to work if you have an `IndexWriter` running for the origin indices, or +/// the destination `Index`. #[doc(hidden)] pub fn merge_indices>>( indices: &[Index], @@ -179,15 +179,15 @@ pub fn merge_indices>>( /// Advanced: Merges a list of segments from different indices in a new index. /// Additional you can provide a delete bitset for each segment to ignore doc_ids. /// -/// Returns `TantivyError` if the the indices list is empty or their +/// Returns `TantivyError` if the indices list is empty or their /// schemas don't match. /// /// `output_directory`: is assumed to be empty. /// /// # Warning /// This function does NOT check or take the `IndexWriter` is running. It is not -/// meant to work if you have an IndexWriter running for the origin indices, or -/// the destination Index. +/// meant to work if you have an `IndexWriter` running for the origin indices, or +/// the destination `Index`. #[doc(hidden)] pub fn merge_filtered_segments>>( segments: &[Segment], diff --git a/src/postings/recorder.rs b/src/postings/recorder.rs index 8b07851c6..79a26400d 100644 --- a/src/postings/recorder.rs +++ b/src/postings/recorder.rs @@ -47,11 +47,11 @@ impl<'a> Iterator for VInt32Reader<'a> { } } -/// Recorder is in charge of recording relevant information about +/// `Recorder` is in charge of recording relevant information about /// the presence of a term in a document. /// -/// Depending on the `TextIndexingOptions` associated with the -/// field, the recorder may records +/// Depending on the [`TextOptions`](crate::schema::TextOptions) associated +/// with the field, the recorder may record: /// * the document frequency /// * the document id /// * the term frequency diff --git a/src/postings/stacker/term_hashmap.rs b/src/postings/stacker/term_hashmap.rs index bf12cf695..f34297093 100644 --- a/src/postings/stacker/term_hashmap.rs +++ b/src/postings/stacker/term_hashmap.rs @@ -199,7 +199,7 @@ impl TermHashMap { /// `update` create a new entry for a given key if it does not exists /// or updates the existing entry. /// - /// The actual logic for this update is define in the the `updater` + /// The actual logic for this update is define in the `updater` /// argument. /// /// If the key is not present, `updater` will receive `None` and diff --git a/src/schema/bytes_options.rs b/src/schema/bytes_options.rs index 37382d337..9a7afff2e 100644 --- a/src/schema/bytes_options.rs +++ b/src/schema/bytes_options.rs @@ -3,7 +3,7 @@ use std::ops::BitOr; use serde::{Deserialize, Serialize}; use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag}; -/// Define how an a bytes field should be handled by tantivy. +/// Define how a bytes field should be handled by tantivy. #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(from = "BytesOptionsDeser")] pub struct BytesOptions { diff --git a/src/schema/mod.rs b/src/schema/mod.rs index a57a88890..4d966a8b9 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -28,10 +28,10 @@ //! use tantivy::schema::*; //! let mut schema_builder = Schema::builder(); //! let title_options = TextOptions::default() -//! .set_stored() -//! .set_indexing_options(TextFieldIndexing::default() -//! .set_tokenizer("default") -//! .set_index_option(IndexRecordOption::WithFreqsAndPositions)); +//! .set_stored() +//! .set_indexing_options(TextFieldIndexing::default() +//! .set_tokenizer("default") +//! .set_index_option(IndexRecordOption::WithFreqsAndPositions)); //! schema_builder.add_text_field("title", title_options); //! let schema = schema_builder.build(); //! ``` @@ -45,8 +45,7 @@ //! In the first phase, the ability to search for documents by the given field is determined by the //! [`IndexRecordOption`] of our [`TextOptions`]. //! -//! The effect of each possible setting is described more in detail -//! [`TextIndexingOptions`](enum.TextIndexingOptions.html). +//! The effect of each possible setting is described more in detail in [`TextOptions`]. //! //! On the other hand setting the field as stored or not determines whether the field should be //! returned when [`Searcher::doc()`](crate::Searcher::doc) is called. @@ -60,8 +59,8 @@ //! use tantivy::schema::*; //! let mut schema_builder = Schema::builder(); //! let num_stars_options = NumericOptions::default() -//! .set_stored() -//! .set_indexed(); +//! .set_stored() +//! .set_indexed(); //! schema_builder.add_u64_field("num_stars", num_stars_options); //! let schema = schema_builder.build(); //! ``` @@ -79,8 +78,8 @@ //! For convenience, it is possible to define your field indexing options by combining different //! flags using the `|` operator. //! -//! For instance, a schema containing the two fields defined in the example above could be rewritten -//! : +//! For instance, a schema containing the two fields defined in the example above could be +//! rewritten: //! //! ``` //! use tantivy::schema::*; diff --git a/src/schema/term.rs b/src/schema/term.rs index 37d5dc02d..99f3e5ed5 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -54,27 +54,27 @@ impl Term { term } - /// Builds a term given a field, and a u64-value + /// Builds a term given a field, and a `u64`-value pub fn from_field_u64(field: Field, val: u64) -> Term { Term::from_fast_value(field, &val) } - /// Builds a term given a field, and a i64-value + /// Builds a term given a field, and a `i64`-value pub fn from_field_i64(field: Field, val: i64) -> Term { Term::from_fast_value(field, &val) } - /// Builds a term given a field, and a f64-value + /// Builds a term given a field, and a `f64`-value pub fn from_field_f64(field: Field, val: f64) -> Term { Term::from_fast_value(field, &val) } - /// Builds a term given a field, and a f64-value + /// Builds a term given a field, and a `bool`-value pub fn from_field_bool(field: Field, val: bool) -> Term { Term::from_fast_value(field, &val) } - /// Builds a term given a field, and a DateTime value + /// Builds a term given a field, and a `DateTime` value pub fn from_field_date(field: Field, val: DateTime) -> Term { Term::from_fast_value(field, &val.truncate(DatePrecision::Seconds)) } @@ -130,7 +130,7 @@ impl Term { self.set_fast_value(val); } - /// Sets a `i64` value in the term. + /// Sets a `DateTime` value in the term. pub fn set_date(&mut self, date: DateTime) { self.set_fast_value(date); } diff --git a/src/schema/text_options.rs b/src/schema/text_options.rs index 44d74fd6c..21e673c6e 100644 --- a/src/schema/text_options.rs +++ b/src/schema/text_options.rs @@ -47,7 +47,9 @@ impl TextOptions { /// unchanged. The "default" tokenizer will store the terms as lower case and this will be /// reflected in the dictionary. /// - /// The original text can be retrieved via `ord_to_term` from the dictionary. + /// The original text can be retrieved via + /// [`TermDictionary::ord_to_term()`](crate::termdict::TermDictionary::ord_to_term) + /// from the dictionary. #[must_use] pub fn set_fast(mut self) -> TextOptions { self.fast = true; diff --git a/src/store/reader.rs b/src/store/reader.rs index 92e9a1506..ff0b4bee9 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -140,10 +140,10 @@ impl StoreReader { self.cache.stats() } - /// Get checkpoint for DocId. The checkpoint can be used to load a block containing the + /// Get checkpoint for `DocId`. The checkpoint can be used to load a block containing the /// document. /// - /// Advanced API. In most cases use [get](Self::get). + /// Advanced API. In most cases use [`get`](Self::get). fn block_checkpoint(&self, doc_id: DocId) -> crate::Result { self.skip_index.seek(doc_id).ok_or_else(|| { crate::TantivyError::InvalidArgument(format!("Failed to lookup Doc #{}.", doc_id)) @@ -160,7 +160,7 @@ impl StoreReader { /// Loads and decompresses a block. /// - /// Advanced API. In most cases use [get](Self::get). + /// Advanced API. In most cases use [`get`](Self::get). fn read_block(&self, checkpoint: &Checkpoint) -> io::Result { let cache_key = checkpoint.byte_range.start; if let Some(block) = self.cache.get_from_cache(cache_key) { @@ -205,7 +205,7 @@ impl StoreReader { /// Advanced API. /// - /// In most cases use [get_document_bytes](Self::get_document_bytes). + /// In most cases use [`get_document_bytes`](Self::get_document_bytes). fn get_document_bytes_from_block( block: OwnedBytes, doc_id: DocId, @@ -219,7 +219,7 @@ impl StoreReader { /// Iterator over all Documents in their order as they are stored in the doc store. /// Use this, if you want to extract all Documents from the doc store. - /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong. + /// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong. pub fn iter<'a: 'b, 'b>( &'b self, alive_bitset: Option<&'a AliveBitSet>, @@ -230,9 +230,9 @@ impl StoreReader { }) } - /// Iterator over all RawDocuments in their order as they are stored in the doc store. + /// Iterator over all raw Documents in their order as they are stored in the doc store. /// Use this, if you want to extract all Documents from the doc store. - /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong. + /// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong. pub(crate) fn iter_raw<'a: 'b, 'b>( &'b self, alive_bitset: Option<&'a AliveBitSet>, @@ -320,7 +320,7 @@ fn block_read_index(block: &[u8], doc_pos: u32) -> crate::Result> { impl StoreReader { /// Advanced API. /// - /// In most cases use [get_async](Self::get_async) + /// In most cases use [`get_async`](Self::get_async) /// /// Loads and decompresses a block asynchronously. async fn read_block_async(&self, checkpoint: &Checkpoint) -> crate::AsyncIoResult { @@ -344,14 +344,14 @@ impl StoreReader { Ok(decompressed_block) } - /// Fetches a document asynchronously. + /// Reads raw bytes of a given document asynchronously. pub async fn get_document_bytes_async(&self, doc_id: DocId) -> crate::Result { let checkpoint = self.block_checkpoint(doc_id)?; let block = self.read_block_async(&checkpoint).await?; Self::get_document_bytes_from_block(block, doc_id, &checkpoint) } - /// Reads raw bytes of a given document. Async version of [get](Self::get). + /// Fetches a document asynchronously. Async version of [`get`](Self::get). pub async fn get_async(&self, doc_id: DocId) -> crate::Result { let mut doc_bytes = self.get_document_bytes_async(doc_id).await?; Ok(Document::deserialize(&mut doc_bytes)?)