mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 16:22:55 +00:00
Merge pull request #1593 from waywardmonkeys/doc-improvements
Documentation improvements.
This commit is contained in:
@@ -452,7 +452,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
|
||||
histogram_req: &HistogramAggregation,
|
||||
sub_aggregation: &AggregationsInternal,
|
||||
) -> crate::Result<Vec<BucketEntry>> {
|
||||
// Generate the the full list of buckets without gaps.
|
||||
// Generate the full list of buckets without gaps.
|
||||
//
|
||||
// The bounds are the min max from the current buckets, optionally extended by
|
||||
// extended_bounds from the request
|
||||
|
||||
@@ -15,12 +15,11 @@ use crate::termdict::TermDictionary;
|
||||
///
|
||||
/// It is safe to delete the segment associated with
|
||||
/// an `InvertedIndexReader`. As long as it is open,
|
||||
/// the `FileSlice` it is relying on should
|
||||
/// the [`FileSlice`] it is relying on should
|
||||
/// stay available.
|
||||
///
|
||||
///
|
||||
/// `InvertedIndexReader` are created by calling
|
||||
/// the `SegmentReader`'s [`.inverted_index(...)`] method
|
||||
/// [`SegmentReader::inverted_index()`](crate::SegmentReader::inverted_index).
|
||||
pub struct InvertedIndexReader {
|
||||
termdict: TermDictionary,
|
||||
postings_file_slice: FileSlice,
|
||||
@@ -75,7 +74,7 @@ impl InvertedIndexReader {
|
||||
///
|
||||
/// This is useful for enumerating through a list of terms,
|
||||
/// and consuming the associated posting lists while avoiding
|
||||
/// reallocating a `BlockSegmentPostings`.
|
||||
/// reallocating a [`BlockSegmentPostings`].
|
||||
///
|
||||
/// # Warning
|
||||
///
|
||||
@@ -96,7 +95,7 @@ impl InvertedIndexReader {
|
||||
/// Returns a block postings given a `Term`.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most user should prefer using `read_postings` instead.
|
||||
/// Most users should prefer using [`Self::read_postings()`] instead.
|
||||
pub fn read_block_postings(
|
||||
&self,
|
||||
term: &Term,
|
||||
@@ -110,7 +109,7 @@ impl InvertedIndexReader {
|
||||
/// Returns a block postings given a `term_info`.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most user should prefer using `read_postings` instead.
|
||||
/// Most users should prefer using [`Self::read_postings()`] instead.
|
||||
pub fn read_block_postings_from_terminfo(
|
||||
&self,
|
||||
term_info: &TermInfo,
|
||||
@@ -130,7 +129,7 @@ impl InvertedIndexReader {
|
||||
/// Returns a posting object given a `term_info`.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most user should prefer using `read_postings` instead.
|
||||
/// Most users should prefer using [`Self::read_postings()`] instead.
|
||||
pub fn read_postings_from_terminfo(
|
||||
&self,
|
||||
term_info: &TermInfo,
|
||||
@@ -164,12 +163,12 @@ impl InvertedIndexReader {
|
||||
/// or `None` if the term has never been encountered and indexed.
|
||||
///
|
||||
/// If the field was not indexed with the indexing options that cover
|
||||
/// the requested options, the returned `SegmentPostings` the method does not fail
|
||||
/// the requested options, the returned [`SegmentPostings`] the method does not fail
|
||||
/// and returns a `SegmentPostings` with as much information as possible.
|
||||
///
|
||||
/// For instance, requesting `IndexRecordOption::Freq` for a
|
||||
/// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
|
||||
/// with `DocId`s and frequencies.
|
||||
/// For instance, requesting [`IndexRecordOption::WithFreqs`] for a
|
||||
/// [`TextOptions`](crate::schema::TextOptions) that does not index position
|
||||
/// will return a [`SegmentPostings`] with `DocId`s and frequencies.
|
||||
pub fn read_postings(
|
||||
&self,
|
||||
term: &Term,
|
||||
@@ -211,7 +210,7 @@ impl InvertedIndexReader {
|
||||
/// Returns a block postings given a `Term`.
|
||||
/// This method is for an advanced usage only.
|
||||
///
|
||||
/// Most user should prefer using `read_postings` instead.
|
||||
/// Most users should prefer using [`Self::read_postings()`] instead.
|
||||
pub async fn warm_postings(
|
||||
&self,
|
||||
term: &Term,
|
||||
|
||||
@@ -216,10 +216,10 @@ impl SegmentReader {
|
||||
/// term dictionary associated with a specific field,
|
||||
/// and opening the posting list associated with any term.
|
||||
///
|
||||
/// If the field is not marked as index, a warn is logged and an empty `InvertedIndexReader`
|
||||
/// If the field is not marked as index, a warning is logged and an empty `InvertedIndexReader`
|
||||
/// is returned.
|
||||
/// Similarly if the field is marked as indexed but no term has been indexed for the given
|
||||
/// index. an empty `InvertedIndexReader` is returned (but no warning is logged).
|
||||
/// Similarly, if the field is marked as indexed but no term has been indexed for the given
|
||||
/// index, an empty `InvertedIndexReader` is returned (but no warning is logged).
|
||||
pub fn inverted_index(&self, field: Field) -> crate::Result<Arc<InvertedIndexReader>> {
|
||||
if let Some(inv_idx_reader) = self
|
||||
.inv_idx_reader_cache
|
||||
|
||||
@@ -13,8 +13,8 @@ use crate::directory::OwnedBytes;
|
||||
/// By contract, whatever happens to the directory file, as long as a FileHandle
|
||||
/// is alive, the data associated with it cannot be altered or destroyed.
|
||||
///
|
||||
/// The underlying behavior is therefore specific to the `Directory` that created it.
|
||||
/// Despite its name, a `FileSlice` may or may not directly map to an actual file
|
||||
/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that
|
||||
/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
|
||||
/// on the filesystem.
|
||||
|
||||
#[async_trait]
|
||||
|
||||
@@ -133,15 +133,15 @@ fn merge(
|
||||
|
||||
/// Advanced: Merges a list of segments from different indices in a new index.
|
||||
///
|
||||
/// Returns `TantivyError` if the the indices list is empty or their
|
||||
/// Returns `TantivyError` if the indices list is empty or their
|
||||
/// schemas don't match.
|
||||
///
|
||||
/// `output_directory`: is assumed to be empty.
|
||||
///
|
||||
/// # Warning
|
||||
/// This function does NOT check or take the `IndexWriter` is running. It is not
|
||||
/// meant to work if you have an IndexWriter running for the origin indices, or
|
||||
/// the destination Index.
|
||||
/// meant to work if you have an `IndexWriter` running for the origin indices, or
|
||||
/// the destination `Index`.
|
||||
#[doc(hidden)]
|
||||
pub fn merge_indices<T: Into<Box<dyn Directory>>>(
|
||||
indices: &[Index],
|
||||
@@ -179,15 +179,15 @@ pub fn merge_indices<T: Into<Box<dyn Directory>>>(
|
||||
/// Advanced: Merges a list of segments from different indices in a new index.
|
||||
/// Additional you can provide a delete bitset for each segment to ignore doc_ids.
|
||||
///
|
||||
/// Returns `TantivyError` if the the indices list is empty or their
|
||||
/// Returns `TantivyError` if the indices list is empty or their
|
||||
/// schemas don't match.
|
||||
///
|
||||
/// `output_directory`: is assumed to be empty.
|
||||
///
|
||||
/// # Warning
|
||||
/// This function does NOT check or take the `IndexWriter` is running. It is not
|
||||
/// meant to work if you have an IndexWriter running for the origin indices, or
|
||||
/// the destination Index.
|
||||
/// meant to work if you have an `IndexWriter` running for the origin indices, or
|
||||
/// the destination `Index`.
|
||||
#[doc(hidden)]
|
||||
pub fn merge_filtered_segments<T: Into<Box<dyn Directory>>>(
|
||||
segments: &[Segment],
|
||||
|
||||
@@ -47,11 +47,11 @@ impl<'a> Iterator for VInt32Reader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Recorder is in charge of recording relevant information about
|
||||
/// `Recorder` is in charge of recording relevant information about
|
||||
/// the presence of a term in a document.
|
||||
///
|
||||
/// Depending on the `TextIndexingOptions` associated with the
|
||||
/// field, the recorder may records
|
||||
/// Depending on the [`TextOptions`](crate::schema::TextOptions) associated
|
||||
/// with the field, the recorder may record:
|
||||
/// * the document frequency
|
||||
/// * the document id
|
||||
/// * the term frequency
|
||||
|
||||
@@ -199,7 +199,7 @@ impl TermHashMap {
|
||||
/// `update` create a new entry for a given key if it does not exists
|
||||
/// or updates the existing entry.
|
||||
///
|
||||
/// The actual logic for this update is define in the the `updater`
|
||||
/// The actual logic for this update is define in the `updater`
|
||||
/// argument.
|
||||
///
|
||||
/// If the key is not present, `updater` will receive `None` and
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::ops::BitOr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::flags::{FastFlag, IndexedFlag, SchemaFlagList, StoredFlag};
|
||||
/// Define how an a bytes field should be handled by tantivy.
|
||||
/// Define how a bytes field should be handled by tantivy.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(from = "BytesOptionsDeser")]
|
||||
pub struct BytesOptions {
|
||||
|
||||
@@ -28,10 +28,10 @@
|
||||
//! use tantivy::schema::*;
|
||||
//! let mut schema_builder = Schema::builder();
|
||||
//! let title_options = TextOptions::default()
|
||||
//! .set_stored()
|
||||
//! .set_indexing_options(TextFieldIndexing::default()
|
||||
//! .set_tokenizer("default")
|
||||
//! .set_index_option(IndexRecordOption::WithFreqsAndPositions));
|
||||
//! .set_stored()
|
||||
//! .set_indexing_options(TextFieldIndexing::default()
|
||||
//! .set_tokenizer("default")
|
||||
//! .set_index_option(IndexRecordOption::WithFreqsAndPositions));
|
||||
//! schema_builder.add_text_field("title", title_options);
|
||||
//! let schema = schema_builder.build();
|
||||
//! ```
|
||||
@@ -45,8 +45,7 @@
|
||||
//! In the first phase, the ability to search for documents by the given field is determined by the
|
||||
//! [`IndexRecordOption`] of our [`TextOptions`].
|
||||
//!
|
||||
//! The effect of each possible setting is described more in detail
|
||||
//! [`TextIndexingOptions`](enum.TextIndexingOptions.html).
|
||||
//! The effect of each possible setting is described more in detail in [`TextOptions`].
|
||||
//!
|
||||
//! On the other hand setting the field as stored or not determines whether the field should be
|
||||
//! returned when [`Searcher::doc()`](crate::Searcher::doc) is called.
|
||||
@@ -60,8 +59,8 @@
|
||||
//! use tantivy::schema::*;
|
||||
//! let mut schema_builder = Schema::builder();
|
||||
//! let num_stars_options = NumericOptions::default()
|
||||
//! .set_stored()
|
||||
//! .set_indexed();
|
||||
//! .set_stored()
|
||||
//! .set_indexed();
|
||||
//! schema_builder.add_u64_field("num_stars", num_stars_options);
|
||||
//! let schema = schema_builder.build();
|
||||
//! ```
|
||||
@@ -79,8 +78,8 @@
|
||||
//! For convenience, it is possible to define your field indexing options by combining different
|
||||
//! flags using the `|` operator.
|
||||
//!
|
||||
//! For instance, a schema containing the two fields defined in the example above could be rewritten
|
||||
//! :
|
||||
//! For instance, a schema containing the two fields defined in the example above could be
|
||||
//! rewritten:
|
||||
//!
|
||||
//! ```
|
||||
//! use tantivy::schema::*;
|
||||
|
||||
@@ -54,27 +54,27 @@ impl Term {
|
||||
term
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a u64-value
|
||||
/// Builds a term given a field, and a `u64`-value
|
||||
pub fn from_field_u64(field: Field, val: u64) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a i64-value
|
||||
/// Builds a term given a field, and a `i64`-value
|
||||
pub fn from_field_i64(field: Field, val: i64) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a f64-value
|
||||
/// Builds a term given a field, and a `f64`-value
|
||||
pub fn from_field_f64(field: Field, val: f64) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a f64-value
|
||||
/// Builds a term given a field, and a `bool`-value
|
||||
pub fn from_field_bool(field: Field, val: bool) -> Term {
|
||||
Term::from_fast_value(field, &val)
|
||||
}
|
||||
|
||||
/// Builds a term given a field, and a DateTime value
|
||||
/// Builds a term given a field, and a `DateTime` value
|
||||
pub fn from_field_date(field: Field, val: DateTime) -> Term {
|
||||
Term::from_fast_value(field, &val.truncate(DatePrecision::Seconds))
|
||||
}
|
||||
@@ -130,7 +130,7 @@ impl Term {
|
||||
self.set_fast_value(val);
|
||||
}
|
||||
|
||||
/// Sets a `i64` value in the term.
|
||||
/// Sets a `DateTime` value in the term.
|
||||
pub fn set_date(&mut self, date: DateTime) {
|
||||
self.set_fast_value(date);
|
||||
}
|
||||
|
||||
@@ -47,7 +47,9 @@ impl TextOptions {
|
||||
/// unchanged. The "default" tokenizer will store the terms as lower case and this will be
|
||||
/// reflected in the dictionary.
|
||||
///
|
||||
/// The original text can be retrieved via `ord_to_term` from the dictionary.
|
||||
/// The original text can be retrieved via
|
||||
/// [`TermDictionary::ord_to_term()`](crate::termdict::TermDictionary::ord_to_term)
|
||||
/// from the dictionary.
|
||||
#[must_use]
|
||||
pub fn set_fast(mut self) -> TextOptions {
|
||||
self.fast = true;
|
||||
|
||||
@@ -140,10 +140,10 @@ impl StoreReader {
|
||||
self.cache.stats()
|
||||
}
|
||||
|
||||
/// Get checkpoint for DocId. The checkpoint can be used to load a block containing the
|
||||
/// Get checkpoint for `DocId`. The checkpoint can be used to load a block containing the
|
||||
/// document.
|
||||
///
|
||||
/// Advanced API. In most cases use [get](Self::get).
|
||||
/// Advanced API. In most cases use [`get`](Self::get).
|
||||
fn block_checkpoint(&self, doc_id: DocId) -> crate::Result<Checkpoint> {
|
||||
self.skip_index.seek(doc_id).ok_or_else(|| {
|
||||
crate::TantivyError::InvalidArgument(format!("Failed to lookup Doc #{}.", doc_id))
|
||||
@@ -160,7 +160,7 @@ impl StoreReader {
|
||||
|
||||
/// Loads and decompresses a block.
|
||||
///
|
||||
/// Advanced API. In most cases use [get](Self::get).
|
||||
/// Advanced API. In most cases use [`get`](Self::get).
|
||||
fn read_block(&self, checkpoint: &Checkpoint) -> io::Result<Block> {
|
||||
let cache_key = checkpoint.byte_range.start;
|
||||
if let Some(block) = self.cache.get_from_cache(cache_key) {
|
||||
@@ -205,7 +205,7 @@ impl StoreReader {
|
||||
|
||||
/// Advanced API.
|
||||
///
|
||||
/// In most cases use [get_document_bytes](Self::get_document_bytes).
|
||||
/// In most cases use [`get_document_bytes`](Self::get_document_bytes).
|
||||
fn get_document_bytes_from_block(
|
||||
block: OwnedBytes,
|
||||
doc_id: DocId,
|
||||
@@ -219,7 +219,7 @@ impl StoreReader {
|
||||
|
||||
/// Iterator over all Documents in their order as they are stored in the doc store.
|
||||
/// Use this, if you want to extract all Documents from the doc store.
|
||||
/// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
|
||||
/// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
|
||||
pub fn iter<'a: 'b, 'b>(
|
||||
&'b self,
|
||||
alive_bitset: Option<&'a AliveBitSet>,
|
||||
@@ -230,9 +230,9 @@ impl StoreReader {
|
||||
})
|
||||
}
|
||||
|
||||
/// Iterator over all RawDocuments in their order as they are stored in the doc store.
|
||||
/// Iterator over all raw Documents in their order as they are stored in the doc store.
|
||||
/// Use this, if you want to extract all Documents from the doc store.
|
||||
/// The alive_bitset has to be forwarded from the `SegmentReader` or the results maybe wrong.
|
||||
/// The `alive_bitset` has to be forwarded from the `SegmentReader` or the results may be wrong.
|
||||
pub(crate) fn iter_raw<'a: 'b, 'b>(
|
||||
&'b self,
|
||||
alive_bitset: Option<&'a AliveBitSet>,
|
||||
@@ -320,7 +320,7 @@ fn block_read_index(block: &[u8], doc_pos: u32) -> crate::Result<Range<usize>> {
|
||||
impl StoreReader {
|
||||
/// Advanced API.
|
||||
///
|
||||
/// In most cases use [get_async](Self::get_async)
|
||||
/// In most cases use [`get_async`](Self::get_async)
|
||||
///
|
||||
/// Loads and decompresses a block asynchronously.
|
||||
async fn read_block_async(&self, checkpoint: &Checkpoint) -> crate::AsyncIoResult<Block> {
|
||||
@@ -344,14 +344,14 @@ impl StoreReader {
|
||||
Ok(decompressed_block)
|
||||
}
|
||||
|
||||
/// Fetches a document asynchronously.
|
||||
/// Reads raw bytes of a given document asynchronously.
|
||||
pub async fn get_document_bytes_async(&self, doc_id: DocId) -> crate::Result<OwnedBytes> {
|
||||
let checkpoint = self.block_checkpoint(doc_id)?;
|
||||
let block = self.read_block_async(&checkpoint).await?;
|
||||
Self::get_document_bytes_from_block(block, doc_id, &checkpoint)
|
||||
}
|
||||
|
||||
/// Reads raw bytes of a given document. Async version of [get](Self::get).
|
||||
/// Fetches a document asynchronously. Async version of [`get`](Self::get).
|
||||
pub async fn get_async(&self, doc_id: DocId) -> crate::Result<Document> {
|
||||
let mut doc_bytes = self.get_document_bytes_async(doc_id).await?;
|
||||
Ok(Document::deserialize(&mut doc_bytes)?)
|
||||
|
||||
Reference in New Issue
Block a user