Merge pull request #1593 from waywardmonkeys/doc-improvements

Documentation improvements.
2026-01-04 16:22:55 +00:00 · 2022-10-05 15:50:08 +08:00
parent 0dc8c458e0 b3bf9a5716
commit 2100ec5d26
12 changed files with 56 additions and 56 deletions
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -452,7 +452,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
    histogram_req: &HistogramAggregation,
    sub_aggregation: &AggregationsInternal,
 ) -> crate::Result<Vec<BucketEntry>> {
-    // Generate the the full list of buckets without gaps.
+    // Generate the full list of buckets without gaps.
    //
    // The bounds are the min max from the current buckets, optionally extended by
    // extended_bounds from the request
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -15,12 +15,11 @@ use crate::termdict::TermDictionary;
 ///
 /// It is safe to delete the segment associated with
 /// an `InvertedIndexReader`. As long as it is open,
-/// the `FileSlice` it is relying on should
+/// the [`FileSlice`] it is relying on should
 /// stay available.
 ///
-///
 /// `InvertedIndexReader` are created by calling
-/// the `SegmentReader`'s [`.inverted_index(...)`] method
+/// [`SegmentReader::inverted_index()`](crate::SegmentReader::inverted_index).
 pub struct InvertedIndexReader {
    termdict: TermDictionary,
    postings_file_slice: FileSlice,
@@ -75,7 +74,7 @@ impl InvertedIndexReader {
    ///
    /// This is useful for enumerating through a list of terms,
    /// and consuming the associated posting lists while avoiding
-    /// reallocating a `BlockSegmentPostings`.
+    /// reallocating a [`BlockSegmentPostings`].
    ///
    /// # Warning
    ///
@@ -96,7 +95,7 @@ impl InvertedIndexReader {
    /// Returns a block postings given a `Term`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub fn read_block_postings(
        &self,
        term: &Term,
@@ -110,7 +109,7 @@ impl InvertedIndexReader {
    /// Returns a block postings given a `term_info`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub fn read_block_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
@@ -130,7 +129,7 @@ impl InvertedIndexReader {
    /// Returns a posting object given a `term_info`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub fn read_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
@@ -164,12 +163,12 @@ impl InvertedIndexReader {
    /// or `None` if the term has never been encountered and indexed.
    ///
    /// If the field was not indexed with the indexing options that cover
-    /// the requested options, the returned `SegmentPostings` the method does not fail
+    /// the requested options, the returned [`SegmentPostings`] the method does not fail
    /// and returns a `SegmentPostings` with as much information as possible.
    ///
-    /// For instance, requesting `IndexRecordOption::Freq` for a
-    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
-    /// with `DocId`s and frequencies.
+    /// For instance, requesting [`IndexRecordOption::WithFreqs`] for a
+    /// [`TextOptions`](crate::schema::TextOptions) that does not index position
+    /// will return a [`SegmentPostings`] with `DocId`s and frequencies.
    pub fn read_postings(
        &self,
        term: &Term,
@@ -211,7 +210,7 @@ impl InvertedIndexReader {
    /// Returns a block postings given a `Term`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub async fn warm_postings(
        &self,
        term: &Term,
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -216,10 +216,10 @@ impl SegmentReader {
    /// term dictionary associated with a specific field,
    /// and opening the posting list associated with any term.
    ///
-    /// If the field is not marked as index, a warn is logged and an empty `InvertedIndexReader`
+    /// If the field is not marked as index, a warning is logged and an empty `InvertedIndexReader`
    /// is returned.
-    /// Similarly if the field is marked as indexed but no term has been indexed for the given
-    /// index. an empty `InvertedIndexReader` is returned (but no warning is logged).
+    /// Similarly, if the field is marked as indexed but no term has been indexed for the given
+    /// index, an empty `InvertedIndexReader` is returned (but no warning is logged).
    pub fn inverted_index(&self, field: Field) -> crate::Result<Arc<InvertedIndexReader>> {
        if let Some(inv_idx_reader) = self
            .inv_idx_reader_cache
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -13,8 +13,8 @@ use crate::directory::OwnedBytes;
 /// By contract, whatever happens to the directory file, as long as a FileHandle
 /// is alive, the data associated with it cannot be altered or destroyed.
 ///
-/// The underlying behavior is therefore specific to the `Directory` that created it.
-/// Despite its name, a `FileSlice` may or may not directly map to an actual file
+/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that
+/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
 /// on the filesystem.

 #[async_trait]
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -133,15 +133,15 @@ fn merge(

 /// Advanced: Merges a list of segments from different indices in a new index.
 ///
-/// Returns `TantivyError` if the the indices list is empty or their
+/// Returns `TantivyError` if the indices list is empty or their
 /// schemas don't match.
 ///
 /// `output_directory`: is assumed to be empty.
 ///
 /// # Warning
 /// This function does NOT check or take the `IndexWriter` is running. It is not
-/// meant to work if you have an IndexWriter running for the origin indices, or
-/// the destination Index.
+/// meant to work if you have an `IndexWriter` running for the origin indices, or
+/// the destination `Index`.
 #[doc(hidden)]
 pub fn merge_indices<T: Into<Box<dyn Directory>>>(
    indices: &[Index],
@@ -179,15 +179,15 @@ pub fn merge_indices<T: Into<Box<dyn Directory>>>(
 /// Advanced: Merges a list of segments from different indices in a new index.
 /// Additional you can provide a delete bitset for each segment to ignore doc_ids.
 ///
-/// Returns `TantivyError` if the the indices list is empty or their
+/// Returns `TantivyError` if the indices list is empty or their
 /// schemas don't match.
 ///
 /// `output_directory`: is assumed to be empty.
 ///
 /// # Warning
 /// This function does NOT check or take the `IndexWriter` is running. It is not
-/// meant to work if you have an IndexWriter running for the origin indices, or
-/// the destination Index.
+/// meant to work if you have an `IndexWriter` running for the origin indices, or
+/// the destination `Index`.
 #[doc(hidden)]
 pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
    segments: &[Segment],
--- a/src/postings/recorder.rs
+++ b/src/postings/recorder.rs
@@ -47,11 +47,11 @@ impl<'a> Iterator for VInt32Reader<'a> {
    }
 }

-/// Recorder is in charge of recording relevant information about
+/// `Recorder` is in charge of recording relevant information about
 /// the presence of a term in a document.
 ///
-/// Depending on the `TextIndexingOptions` associated with the
-/// field, the recorder may records
+/// Depending on the [`TextOptions`](crate::schema::TextOptions) associated
+/// with the field, the recorder may record:
 ///   * the document frequency
 ///   * the document id
 ///   * the term frequency
--- a/src/postings/stacker/term_hashmap.rs
+++ b/src/postings/stacker/term_hashmap.rs
@@ -199,7 +199,7 @@ impl TermHashMap {
    /// `update` create a new entry for a given key if it does not exists
    /// or updates the existing entry.
    ///
-    /// The actual logic for this update is define in the the `updater`
+    /// The actual logic for this update is define in the `updater`
    /// argument.
    ///
    /// If the key is not present, `updater` will receive `None` and
--- a/src/schema/bytes_options.rs
+++ b/src/schema/bytes_options.rs
@@ -3,7 +3,7 @@ use std::ops::BitOr;
 use serde::{Deserialize, Serialize};

 use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
-/// Define how an a bytes field should be handled by tantivy.
+/// Define how a bytes field should be handled by tantivy.
 #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(from = "BytesOptionsDeser")]
 pub struct BytesOptions {
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -28,10 +28,10 @@
 //! use tantivy::schema::*;
 //! let mut schema_builder = Schema::builder();
 //! let title_options = TextOptions::default()
-//! .set_stored()
-//! .set_indexing_options(TextFieldIndexing::default()
-//! .set_tokenizer("default")
-//! .set_index_option(IndexRecordOption::WithFreqsAndPositions));
+//!     .set_stored()
+//!     .set_indexing_options(TextFieldIndexing::default()
+//!     .set_tokenizer("default")
+//!     .set_index_option(IndexRecordOption::WithFreqsAndPositions));
 //! schema_builder.add_text_field("title", title_options);
 //! let schema = schema_builder.build();
 //! ```
@@ -45,8 +45,7 @@
 //! In the first phase, the ability to search for documents by the given field is determined by the
 //! [`IndexRecordOption`] of our [`TextOptions`].
 //!
-//! The effect of each possible setting is described more in detail
-//! [`TextIndexingOptions`](enum.TextIndexingOptions.html).
+//! The effect of each possible setting is described more in detail in [`TextOptions`].
 //!
 //! On the other hand setting the field as stored or not determines whether the field should be
 //! returned when [`Searcher::doc()`](crate::Searcher::doc) is called.
@@ -60,8 +59,8 @@
 //! use tantivy::schema::*;
 //! let mut schema_builder = Schema::builder();
 //! let num_stars_options = NumericOptions::default()
-//! .set_stored()
-//! .set_indexed();
+//!     .set_stored()
+//!     .set_indexed();
 //! schema_builder.add_u64_field("num_stars", num_stars_options);
 //! let schema = schema_builder.build();
 //! ```
@@ -79,8 +78,8 @@
 //! For convenience, it is possible to define your field indexing options by combining different
 //! flags using the  `|` operator.
 //!
-//! For instance, a schema containing the two fields defined in the example above could be rewritten
-//! :
+//! For instance, a schema containing the two fields defined in the example above could be
+//! rewritten:
 //!
 //! ```
 //! use tantivy::schema::*;
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -54,27 +54,27 @@ impl Term {
        term
    }

-    /// Builds a term given a field, and a u64-value
+    /// Builds a term given a field, and a `u64`-value
    pub fn from_field_u64(field: Field, val: u64) -> Term {
        Term::from_fast_value(field, &val)
    }

-    /// Builds a term given a field, and a i64-value
+    /// Builds a term given a field, and a `i64`-value
    pub fn from_field_i64(field: Field, val: i64) -> Term {
        Term::from_fast_value(field, &val)
    }

-    /// Builds a term given a field, and a f64-value
+    /// Builds a term given a field, and a `f64`-value
    pub fn from_field_f64(field: Field, val: f64) -> Term {
        Term::from_fast_value(field, &val)
    }

-    /// Builds a term given a field, and a f64-value
+    /// Builds a term given a field, and a `bool`-value
    pub fn from_field_bool(field: Field, val: bool) -> Term {
        Term::from_fast_value(field, &val)
    }

-    /// Builds a term given a field, and a DateTime value
+    /// Builds a term given a field, and a `DateTime` value
    pub fn from_field_date(field: Field, val: DateTime) -> Term {
        Term::from_fast_value(field, &val.truncate(DatePrecision::Seconds))
    }
@@ -130,7 +130,7 @@ impl Term {
        self.set_fast_value(val);
    }

-    /// Sets a `i64` value in the term.
+    /// Sets a `DateTime` value in the term.
    pub fn set_date(&mut self, date: DateTime) {
        self.set_fast_value(date);
    }
--- a/src/schema/text_options.rs
+++ b/src/schema/text_options.rs
@@ -47,7 +47,9 @@ impl TextOptions {
    /// unchanged. The "default" tokenizer will store the terms as lower case and this will be
    /// reflected in the dictionary.
    ///
-    /// The original text can be retrieved via `ord_to_term` from the dictionary.
+    /// The original text can be retrieved via
+    /// [`TermDictionary::ord_to_term()`](crate::termdict::TermDictionary::ord_to_term)
+    /// from the dictionary.
    #[must_use]
    pub fn set_fast(mut self) -> TextOptions {
        self.fast = true;
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -140,10 +140,10 @@ impl StoreReader {
        self.cache.stats()
    }

-    /// Get checkpoint for DocId. The checkpoint can be used to load a block containing the
+    /// Get checkpoint for `DocId`. The checkpoint can be used to load a block containing the
    /// document.
    ///
-    /// Advanced API. In most cases use [get](Self::get).
+    /// Advanced API. In most cases use [`get`](Self::get).
    fn block_checkpoint(&self, doc_id: DocId) -> crate::Result<Checkpoint> {
        self.skip_index.seek(doc_id).ok_or_else(|| {
            crate::TantivyError::InvalidArgument(format!("Failed to lookup Doc #{}.", doc_id))
@@ -160,7 +160,7 @@ impl StoreReader {

    /// Loads and decompresses a block.
    ///
-    /// Advanced API. In most cases use [get](Self::get).
+    /// Advanced API. In most cases use [`get`](Self::get).
    fn read_block(&self, checkpoint: &Checkpoint) -> io::Result<Block> {
        let cache_key = checkpoint.byte_range.start;
        if let Some(block) = self.cache.get_from_cache(cache_key) {
@@ -205,7 +205,7 @@ impl StoreReader {

    /// Advanced API.
    ///
-    /// In most cases use [get_document_bytes](Self::get_document_bytes).
+    /// In most cases use [`get_document_bytes`](Self::get_document_bytes).
    fn get_document_bytes_from_block(
        block: OwnedBytes,
        doc_id: DocId,
@@ -219,7 +219,7 @@ impl StoreReader {

    /// Iterator over all Documents in their order as they are stored in the doc store.
    /// Use this, if you want to extract all Documents from the doc store.
-    /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
+    /// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
    pub fn iter<'a: 'b, 'b>(
        &'b self,
        alive_bitset: Option<&'a AliveBitSet>,
@@ -230,9 +230,9 @@ impl StoreReader {
        })
    }

-    /// Iterator over all RawDocuments in their order as they are stored in the doc store.
+    /// Iterator over all raw Documents in their order as they are stored in the doc store.
    /// Use this, if you want to extract all Documents from the doc store.
-    /// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
+    /// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
    pub(crate) fn iter_raw<'a: 'b, 'b>(
        &'b self,
        alive_bitset: Option<&'a AliveBitSet>,
@@ -320,7 +320,7 @@ fn block_read_index(block: &[u8], doc_pos: u32) -> crate::Result<Range<usize>> {
 impl StoreReader {
    /// Advanced API.
    ///
-    /// In most cases use [get_async](Self::get_async)
+    /// In most cases use [`get_async`](Self::get_async)
    ///
    /// Loads and decompresses a block asynchronously.
    async fn read_block_async(&self, checkpoint: &Checkpoint) -> crate::AsyncIoResult<Block> {
@@ -344,14 +344,14 @@ impl StoreReader {
        Ok(decompressed_block)
    }

-    /// Fetches a document asynchronously.
+    /// Reads raw bytes of a given document asynchronously.
    pub async fn get_document_bytes_async(&self, doc_id: DocId) -> crate::Result<OwnedBytes> {
        let checkpoint = self.block_checkpoint(doc_id)?;
        let block = self.read_block_async(&checkpoint).await?;
        Self::get_document_bytes_from_block(block, doc_id, &checkpoint)
    }

-    /// Reads raw bytes of a given document. Async version of [get](Self::get).
+    /// Fetches a document asynchronously. Async version of [`get`](Self::get).
    pub async fn get_async(&self, doc_id: DocId) -> crate::Result<Document> {
        let mut doc_bytes = self.get_document_bytes_async(doc_id).await?;
        Ok(Document::deserialize(&mut doc_bytes)?)