Documentation improvements.

Fix some linking, some grammar, some typos, etc.
This commit is contained in:
Bruce Mitchener
2022-09-18 14:11:12 +07:00
parent 1a2ba7025a
commit 6a88ac3fe3
22 changed files with 121 additions and 107 deletions

View File

@@ -95,7 +95,7 @@ called [`Directory`](src/directory/directory.rs).
Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object. Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object.
Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details. Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details.
Tantivy ships two main directory implementation: the `MMapDirectory` and the `RAMDirectory`, Tantivy ships two main directory implementation: the `MmapDirectory` and the `RamDirectory`,
but users can extend tantivy with their own implementation. but users can extend tantivy with their own implementation.
## [schema/](src/schema): What are documents? ## [schema/](src/schema): What are documents?

View File

@@ -55,13 +55,13 @@ const HIGHEST_BIT: u64 = 1 << 63;
/// to values over 2^63, and all values end up requiring 64 bits. /// to values over 2^63, and all values end up requiring 64 bits.
/// ///
/// # See also /// # See also
/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html). /// The reverse mapping is [`u64_to_i64()`].
#[inline] #[inline]
pub fn i64_to_u64(val: i64) -> u64 { pub fn i64_to_u64(val: i64) -> u64 {
(val as u64) ^ HIGHEST_BIT (val as u64) ^ HIGHEST_BIT
} }
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). /// Reverse the mapping given by [`i64_to_u64()`].
#[inline] #[inline]
pub fn u64_to_i64(val: u64) -> i64 { pub fn u64_to_i64(val: u64) -> i64 {
(val ^ HIGHEST_BIT) as i64 (val ^ HIGHEST_BIT) as i64
@@ -83,7 +83,7 @@ pub fn u64_to_i64(val: u64) -> i64 {
/// explains the mapping in a clear manner. /// explains the mapping in a clear manner.
/// ///
/// # See also /// # See also
/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html). /// The reverse mapping is [`u64_to_f64()`].
#[inline] #[inline]
pub fn f64_to_u64(val: f64) -> u64 { pub fn f64_to_u64(val: f64) -> u64 {
let bits = val.to_bits(); let bits = val.to_bits();
@@ -94,7 +94,7 @@ pub fn f64_to_u64(val: f64) -> u64 {
} }
} }
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). /// Reverse the mapping given by [`f64_to_u64()`].
#[inline] #[inline]
pub fn u64_to_f64(val: u64) -> f64 { pub fn u64_to_f64(val: u64) -> f64 {
f64::from_bits(if val & HIGHEST_BIT != 0 { f64::from_bits(if val & HIGHEST_BIT != 0 {

View File

@@ -55,7 +55,7 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
} }
/// Struct used to prevent from calling /// Struct used to prevent from calling
/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly /// [`terminate_ref`](TerminatingWrite::terminate_ref) directly
/// ///
/// The point is that while the type is public, it cannot be built by anyone /// The point is that while the type is public, it cannot be built by anyone
/// outside of this module. /// outside of this module.

View File

@@ -36,8 +36,7 @@ fn main() -> tantivy::Result<()> {
// need to be able to be able to retrieve it // need to be able to be able to retrieve it
// for our application. // for our application.
// //
// We can make our index lighter and // We can make our index lighter by omitting the `STORED` flag.
// by omitting `STORED` flag.
let body = schema_builder.add_text_field("body", TEXT); let body = schema_builder.add_text_field("body", TEXT);
let schema = schema_builder.build(); let schema = schema_builder.build();

View File

@@ -23,7 +23,7 @@ const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|
/// Parses a field_name /// Parses a field_name
/// A field name must have at least one character and be followed by a colon. /// A field name must have at least one character and be followed by a colon.
/// All characters are allowed including special characters `SPECIAL_CHARS`, but these /// All characters are allowed including special characters `SPECIAL_CHARS`, but these
/// need to be escaped with a backslack character '\'. /// need to be escaped with a backslash character '\'.
fn field_name<'a>() -> impl Parser<&'a str, Output = String> { fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> = static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap()); Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
@@ -68,7 +68,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
/// ///
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99 /// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
/// We delegate rejecting such invalid dates to the logical AST computation code /// We delegate rejecting such invalid dates to the logical AST computation code
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse /// which invokes `time::OffsetDateTime::parse(..., &Rfc3339)` on the value to actually parse
/// it (instead of merely extracting the datetime value as string as done here). /// it (instead of merely extracting the datetime value as string as done here).
fn date_time<'a>() -> impl Parser<&'a str, Output = String> { fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
let two_digits = || recognize::<String, _, _>((digit(), digit())); let two_digits = || recognize::<String, _, _>((digit(), digit()));

View File

@@ -67,10 +67,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`). /// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
/// ///
/// Once collection is finished, you can harvest its results in the form /// Once collection is finished, you can harvest its results in the form
/// of a `FacetCounts` object, and extract your face t counts from it. /// of a [`FacetCounts`] object, and extract your facet counts from it.
/// ///
/// This implementation assumes you are working with a number of facets that /// This implementation assumes you are working with a number of facets that
/// is much hundreds of time lower than your number of documents. /// is many hundreds of times smaller than your number of documents.
/// ///
/// ///
/// ```rust /// ```rust
@@ -231,7 +231,7 @@ impl FacetCollector {
/// ///
/// Adding two facets within which one is the prefix of the other is forbidden. /// Adding two facets within which one is the prefix of the other is forbidden.
/// If you need the correct number of unique documents for two such facets, /// If you need the correct number of unique documents for two such facets,
/// just add them in separate `FacetCollector`. /// just add them in a separate `FacetCollector`.
pub fn add_facet<T>(&mut self, facet_from: T) pub fn add_facet<T>(&mut self, facet_from: T)
where Facet: From<T> { where Facet: From<T> {
let facet = Facet::from(facet_from); let facet = Facet::from(facet_from);
@@ -391,7 +391,7 @@ impl<'a> Iterator for FacetChildIterator<'a> {
impl FacetCounts { impl FacetCounts {
/// Returns an iterator over all of the facet count pairs inside this result. /// Returns an iterator over all of the facet count pairs inside this result.
/// See the documentation for [FacetCollector] for a usage example. /// See the documentation for [`FacetCollector`] for a usage example.
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_> pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
where Facet: From<T> { where Facet: From<T> {
let facet = Facet::from(facet_from); let facet = Facet::from(facet_from);
@@ -410,7 +410,7 @@ impl FacetCounts {
} }
/// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts. /// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts.
/// See the documentation for [FacetCollector] for a usage example. /// See the documentation for [`FacetCollector`] for a usage example.
pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)> pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
where Facet: From<T> { where Facet: From<T> {
let mut heap = BinaryHeap::with_capacity(k); let mut heap = BinaryHeap::with_capacity(k);

View File

@@ -4,9 +4,9 @@
//! In tantivy jargon, we call this information your search "fruit". //! In tantivy jargon, we call this information your search "fruit".
//! //!
//! Your fruit could for instance be : //! Your fruit could for instance be :
//! - [the count of matching documents](./struct.Count.html) //! - [the count of matching documents](crate::collector::Count)
//! - [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html) //! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs)
//! - [facet counts](./struct.FacetCollector.html) //! - [facet counts](FacetCollector)
//! //!
//! At one point in your code, you will trigger the actual search operation by calling //! At one point in your code, you will trigger the actual search operation by calling
//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search). //! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).

View File

@@ -78,8 +78,8 @@ fn save_new_metas(
/// IndexBuilder can be used to create an index. /// IndexBuilder can be used to create an index.
/// ///
/// Use in conjunction with `SchemaBuilder`. Global index settings /// Use in conjunction with [`SchemaBuilder`][crate::schema::SchemaBuilder].
/// can be configured with `IndexSettings` /// Global index settings can be configured with [`IndexSettings`].
/// ///
/// # Examples /// # Examples
/// ///
@@ -97,7 +97,13 @@ fn save_new_metas(
/// ); /// );
/// ///
/// let schema = schema_builder.build(); /// let schema = schema_builder.build();
/// let settings = IndexSettings{sort_by_field: Some(IndexSortByField{field:"number".to_string(), order:Order::Asc}), ..Default::default()}; /// let settings = IndexSettings{
/// sort_by_field: Some(IndexSortByField{
/// field: "number".to_string(),
/// order: Order::Asc
/// }),
/// ..Default::default()
/// };
/// let index = Index::builder().schema(schema).settings(settings).create_in_ram(); /// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
/// ``` /// ```
pub struct IndexBuilder { pub struct IndexBuilder {
@@ -140,7 +146,7 @@ impl IndexBuilder {
self self
} }
/// Creates a new index using the `RAMDirectory`. /// Creates a new index using the [`RamDirectory`].
/// ///
/// The index will be allocated in anonymous memory. /// The index will be allocated in anonymous memory.
/// This should only be used for unit tests. /// This should only be used for unit tests.
@@ -148,13 +154,14 @@ impl IndexBuilder {
let ram_directory = RamDirectory::create(); let ram_directory = RamDirectory::create();
Ok(self Ok(self
.create(ram_directory) .create(ram_directory)
.expect("Creating a RAMDirectory should never fail")) .expect("Creating a RamDirectory should never fail"))
} }
/// Creates a new index in a given filepath. /// Creates a new index in a given filepath.
/// The index will use the `MMapDirectory`. /// The index will use the [`MmapDirectory`].
/// ///
/// If a previous index was in this directory, it returns an `IndexAlreadyExists` error. /// If a previous index was in this directory, it returns an
/// [`TantivyError::IndexAlreadyExists`] error.
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> { pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?); let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
@@ -185,12 +192,13 @@ impl IndexBuilder {
/// Creates a new index in a temp directory. /// Creates a new index in a temp directory.
/// ///
/// The index will use the `MMapDirectory` in a newly created directory. /// The index will use the [`MmapDirectory`] in a newly created directory.
/// The temp directory will be destroyed automatically when the `Index` object /// The temp directory will be destroyed automatically when the [`Index`] object
/// is destroyed. /// is destroyed.
/// ///
/// The temp directory is only used for testing the `MmapDirectory`. /// The temp directory is only used for testing the [`MmapDirectory`].
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`. /// For other unit tests, prefer the [`RamDirectory`], see:
/// [`IndexBuilder::create_in_ram()`].
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
pub fn create_from_tempdir(self) -> crate::Result<Index> { pub fn create_from_tempdir(self) -> crate::Result<Index> {
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?); let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
@@ -286,7 +294,7 @@ impl Index {
self.set_multithread_executor(default_num_threads) self.set_multithread_executor(default_num_threads)
} }
/// Creates a new index using the `RamDirectory`. /// Creates a new index using the [`RamDirectory`].
/// ///
/// The index will be allocated in anonymous memory. /// The index will be allocated in anonymous memory.
/// This is useful for indexing small set of documents /// This is useful for indexing small set of documents
@@ -296,9 +304,10 @@ impl Index {
} }
/// Creates a new index in a given filepath. /// Creates a new index in a given filepath.
/// The index will use the `MMapDirectory`. /// The index will use the [`MmapDirectory`].
/// ///
/// If a previous index was in this directory, then it returns an `IndexAlreadyExists` error. /// If a previous index was in this directory, then it returns
/// a [`TantivyError::IndexAlreadyExists`] error.
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
pub fn create_in_dir<P: AsRef<Path>>( pub fn create_in_dir<P: AsRef<Path>>(
directory_path: P, directory_path: P,
@@ -320,12 +329,13 @@ impl Index {
/// Creates a new index in a temp directory. /// Creates a new index in a temp directory.
/// ///
/// The index will use the `MMapDirectory` in a newly created directory. /// The index will use the [`MmapDirectory`] in a newly created directory.
/// The temp directory will be destroyed automatically when the `Index` object /// The temp directory will be destroyed automatically when the [`Index`] object
/// is destroyed. /// is destroyed.
/// ///
/// The temp directory is only used for testing the `MmapDirectory`. /// The temp directory is only used for testing the [`MmapDirectory`].
/// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`. /// For other unit tests, prefer the [`RamDirectory`],
/// see: [`IndexBuilder::create_in_ram()`].
#[cfg(feature = "mmap")] #[cfg(feature = "mmap")]
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> { pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
IndexBuilder::new().schema(schema).create_from_tempdir() IndexBuilder::new().schema(schema).create_from_tempdir()
@@ -345,7 +355,7 @@ impl Index {
builder.create(dir) builder.create(dir)
} }
/// Creates a new index given a directory and an `IndexMeta`. /// Creates a new index given a directory and an [`IndexMeta`].
fn open_from_metas( fn open_from_metas(
directory: ManagedDirectory, directory: ManagedDirectory,
metas: &IndexMeta, metas: &IndexMeta,
@@ -372,7 +382,7 @@ impl Index {
&self.tokenizers &self.tokenizers
} }
/// Helper to access the tokenizer associated to a specific field. /// Get the tokenizer associated with a specific field.
pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> { pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> {
let field_entry = self.schema.get_field_entry(field); let field_entry = self.schema.get_field_entry(field);
let field_type = field_entry.field_type(); let field_type = field_entry.field_type();
@@ -404,14 +414,14 @@ impl Index {
}) })
} }
/// Create a default `IndexReader` for the given index. /// Create a default [`IndexReader`] for the given index.
/// ///
/// See [`Index.reader_builder()`](#method.reader_builder). /// See [`Index.reader_builder()`].
pub fn reader(&self) -> crate::Result<IndexReader> { pub fn reader(&self) -> crate::Result<IndexReader> {
self.reader_builder().try_into() self.reader_builder().try_into()
} }
/// Create a `IndexReader` for the given index. /// Create a [`IndexReader`] for the given index.
/// ///
/// Most project should create at most one reader for a given index. /// Most project should create at most one reader for a given index.
/// This method is typically called only once per `Index` instance. /// This method is typically called only once per `Index` instance.

View File

@@ -82,7 +82,7 @@ impl Searcher {
/// Fetches a document from tantivy's store given a `DocAddress`. /// Fetches a document from tantivy's store given a `DocAddress`.
/// ///
/// The searcher uses the segment ordinal to route the /// The searcher uses the segment ordinal to route the
/// the request to the right `Segment`. /// request to the right `Segment`.
pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> { pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
store_reader.get(doc_address.doc_id) store_reader.get(doc_address.doc_id)

View File

@@ -117,9 +117,9 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
/// change. /// change.
/// ///
/// Specifically, subsequent writes or flushes should /// Specifically, subsequent writes or flushes should
/// have no effect on the returned `FileSlice` object. /// have no effect on the returned [`FileSlice`] object.
/// ///
/// You should only use this to read files create with [Directory::open_write]. /// You should only use this to read files create with [`Directory::open_write()`].
fn open_read(&self, path: &Path) -> Result<FileSlice, OpenReadError> { fn open_read(&self, path: &Path) -> Result<FileSlice, OpenReadError> {
let file_handle = self.get_file_handle(path)?; let file_handle = self.get_file_handle(path)?;
Ok(FileSlice::new(file_handle)) Ok(FileSlice::new(file_handle))
@@ -128,27 +128,28 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
/// Removes a file /// Removes a file
/// ///
/// Removing a file will not affect an eventual /// Removing a file will not affect an eventual
/// existing FileSlice pointing to it. /// existing [`FileSlice`] pointing to it.
/// ///
/// Removing a nonexistent file, yields a /// Removing a nonexistent file, returns a
/// `DeleteError::DoesNotExist`. /// [`DeleteError::FileDoesNotExist`].
fn delete(&self, path: &Path) -> Result<(), DeleteError>; fn delete(&self, path: &Path) -> Result<(), DeleteError>;
/// Returns true if and only if the file exists /// Returns true if and only if the file exists
fn exists(&self, path: &Path) -> Result<bool, OpenReadError>; fn exists(&self, path: &Path) -> Result<bool, OpenReadError>;
/// Opens a writer for the *virtual file* associated with /// Opens a writer for the *virtual file* associated with
/// a Path. /// a [`Path`].
/// ///
/// Right after this call, for the span of the execution of the program /// Right after this call, for the span of the execution of the program
/// the file should be created and any subsequent call to `open_read` for the /// the file should be created and any subsequent call to
/// same path should return a `FileSlice`. /// [`Directory::open_read()`] for the same path should return
/// a [`FileSlice`].
/// ///
/// However, depending on the directory implementation, /// However, depending on the directory implementation,
/// it might be required to call `sync_directory` to ensure /// it might be required to call [`Directory::sync_directory()`] to ensure
/// that the file is durably created. /// that the file is durably created.
/// (The semantics here are the same when dealing with /// (The semantics here are the same when dealing with
/// a posix filesystem.) /// a POSIX filesystem.)
/// ///
/// Write operations may be aggressively buffered. /// Write operations may be aggressively buffered.
/// The client of this trait is responsible for calling flush /// The client of this trait is responsible for calling flush
@@ -157,19 +158,19 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
/// ///
/// Flush operation should also be persistent. /// Flush operation should also be persistent.
/// ///
/// The user shall not rely on `Drop` triggering `flush`. /// The user shall not rely on [`Drop`] triggering `flush`.
/// Note that `RamDirectory` will panic! if `flush` /// Note that [`RamDirectory`][crate::directory::RamDirectory] will
/// was not called. /// panic! if `flush` was not called.
/// ///
/// The file may not previously exist. /// The file may not previously exist.
fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError>; fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError>;
/// Reads the full content file that has been written using /// Reads the full content file that has been written using
/// atomic_write. /// [`Directory::atomic_write()`].
/// ///
/// This should only be used for small files. /// This should only be used for small files.
/// ///
/// You should only use this to read files create with [Directory::atomic_write]. /// You should only use this to read files create with [`Directory::atomic_write()`].
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>; fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;
/// Atomically replace the content of a file with data. /// Atomically replace the content of a file with data.
@@ -188,7 +189,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
/// Acquire a lock in the given directory. /// Acquire a lock in the given directory.
/// ///
/// The method is blocking or not depending on the `Lock` object. /// The method is blocking or not depending on the [`Lock`] object.
fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> { fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
let mut box_directory = self.box_clone(); let mut box_directory = self.box_clone();
let mut retry_policy = retry_policy(lock.is_blocking); let mut retry_policy = retry_policy(lock.is_blocking);
@@ -210,15 +211,15 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
} }
/// Registers a callback that will be called whenever a change on the `meta.json` /// Registers a callback that will be called whenever a change on the `meta.json`
/// using the `atomic_write` API is detected. /// using the [`Directory::atomic_write()`] API is detected.
/// ///
/// The behavior when using `.watch()` on a file using [Directory::open_write] is, on the other /// The behavior when using `.watch()` on a file using [`Directory::open_write()`] is, on the
/// hand, undefined. /// other hand, undefined.
/// ///
/// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is /// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
/// required to keep it. /// required to keep it.
/// It does not override previous callbacks. When the file is modified, all callback that are /// It does not override previous callbacks. When the file is modified, all callback that are
/// registered (and whose `WatchHandle` is still alive) are triggered. /// registered (and whose [`WatchHandle`] is still alive) are triggered.
/// ///
/// Internally, tantivy only uses this API to detect new commits to implement the /// Internally, tantivy only uses this API to detect new commits to implement the
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the /// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the

View File

@@ -8,8 +8,8 @@ use once_cell::sync::Lazy;
/// [`LockParams`](./enum.LockParams.html). /// [`LockParams`](./enum.LockParams.html).
/// Tantivy itself uses only two locks but client application /// Tantivy itself uses only two locks but client application
/// can use the directory facility to define their own locks. /// can use the directory facility to define their own locks.
/// - [INDEX_WRITER_LOCK] /// - [`INDEX_WRITER_LOCK`]
/// - [META_LOCK] /// - [`META_LOCK`]
/// ///
/// Check out these locks documentation for more information. /// Check out these locks documentation for more information.
#[derive(Debug)] #[derive(Debug)]
@@ -30,7 +30,8 @@ pub struct Lock {
} }
/// Only one process should be able to write tantivy's index at a time. /// Only one process should be able to write tantivy's index at a time.
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter. /// This lock file, when present, is in charge of preventing other processes to open an
/// `IndexWriter`.
/// ///
/// If the process is killed and this file remains, it is safe to remove it manually. /// If the process is killed and this file remains, it is safe to remove it manually.
/// ///

View File

@@ -56,10 +56,10 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
#[derive(Default, Clone, Debug, Serialize, Deserialize)] #[derive(Default, Clone, Debug, Serialize, Deserialize)]
pub struct CacheCounters { pub struct CacheCounters {
// Number of time the cache prevents to call `mmap` /// Number of time the cache prevents to call `mmap`
pub hit: usize, pub hit: usize,
// Number of time tantivy had to call `mmap` /// Number of time tantivy had to call `mmap`
// as no entry was in the cache. /// as no entry was in the cache.
pub miss: usize, pub miss: usize,
} }

View File

@@ -15,7 +15,7 @@ use crate::directory::{
WatchHandle, WritePtr, WatchHandle, WritePtr,
}; };
/// Writer associated with the `RamDirectory` /// Writer associated with the [`RamDirectory`].
/// ///
/// The Writer just writes a buffer. /// The Writer just writes a buffer.
struct VecWriter { struct VecWriter {
@@ -137,17 +137,17 @@ impl RamDirectory {
} }
/// Returns the sum of the size of the different files /// Returns the sum of the size of the different files
/// in the RamDirectory. /// in the [`RamDirectory`].
pub fn total_mem_usage(&self) -> usize { pub fn total_mem_usage(&self) -> usize {
self.fs.read().unwrap().total_mem_usage() self.fs.read().unwrap().total_mem_usage()
} }
/// Write a copy of all of the files saved in the RamDirectory in the target `Directory`. /// Write a copy of all of the files saved in the [`RamDirectory`] in the target [`Directory`].
/// ///
/// Files are all written using the `Directory::write` meaning, even if they were /// Files are all written using the [`Directory::open_write()`] meaning, even if they were
/// written using the `atomic_write` api. /// written using the [`Directory::atomic_write()`] api.
/// ///
/// If an error is encounterred, files may be persisted partially. /// If an error is encountered, files may be persisted partially.
pub fn persist(&self, dest: &dyn Directory) -> crate::Result<()> { pub fn persist(&self, dest: &dyn Directory) -> crate::Result<()> {
let wlock = self.fs.write().unwrap(); let wlock = self.fs.write().unwrap();
for (path, file) in wlock.fs.iter() { for (path, file) in wlock.fs.iter() {

View File

@@ -13,15 +13,17 @@ use crate::DocId;
/// This `BytesFastFieldWriter` is only useful for advanced users. /// This `BytesFastFieldWriter` is only useful for advanced users.
/// The normal way to get your associated bytes in your index /// The normal way to get your associated bytes in your index
/// is to /// is to
/// - declare your field with fast set to `Cardinality::SingleValue` /// - declare your field with fast set to
/// [`Cardinality::SingleValue`](crate::schema::Cardinality::SingleValue)
/// in your schema /// in your schema
/// - add your document simply by calling `.add_document(...)` with associating bytes to the field. /// - add your document simply by calling `.add_document(...)` with associating bytes to the field.
/// ///
/// The `BytesFastFieldWriter` can be acquired from the /// The `BytesFastFieldWriter` can be acquired from the
/// fast field writer by calling /// fast field writer by calling
/// [`.get_bytes_writer(...)`](./struct.FastFieldsWriter.html#method.get_bytes_writer). /// [`.get_bytes_writer_mut(...)`](crate::fastfield::FastFieldsWriter::get_bytes_writer_mut).
/// ///
/// Once acquired, writing is done by calling `.add_document_val(&[u8])` /// Once acquired, writing is done by calling
/// [`.add_document_val(&[u8])`](BytesFastFieldWriter::add_document_val)
/// once per document, even if there are no bytes associated to it. /// once per document, even if there are no bytes associated to it.
pub struct BytesFastFieldWriter { pub struct BytesFastFieldWriter {
field: Field, field: Field,

View File

@@ -31,7 +31,7 @@ pub const MARGIN_IN_BYTES: usize = 1_000_000;
pub const MEMORY_ARENA_NUM_BYTES_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize; pub const MEMORY_ARENA_NUM_BYTES_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize;
pub const MEMORY_ARENA_NUM_BYTES_MAX: usize = u32::MAX as usize - MARGIN_IN_BYTES; pub const MEMORY_ARENA_NUM_BYTES_MAX: usize = u32::MAX as usize - MARGIN_IN_BYTES;
// We impose the number of index writer thread to be at most this. // We impose the number of index writer threads to be at most this.
pub const MAX_NUM_THREAD: usize = 8; pub const MAX_NUM_THREAD: usize = 8;
// Add document will block if the number of docs waiting in the queue to be indexed // Add document will block if the number of docs waiting in the queue to be indexed
@@ -40,7 +40,7 @@ const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;
fn error_in_index_worker_thread(context: &str) -> TantivyError { fn error_in_index_worker_thread(context: &str) -> TantivyError {
TantivyError::ErrorInThread(format!( TantivyError::ErrorInThread(format!(
"{}. A worker thread encounterred an error (io::Error most likely) or panicked.", "{}. A worker thread encountered an error (io::Error most likely) or panicked.",
context context
)) ))
} }
@@ -49,7 +49,7 @@ fn error_in_index_worker_thread(context: &str) -> TantivyError {
/// ///
/// It manages a small number of indexing thread, as well as a shared /// It manages a small number of indexing thread, as well as a shared
/// indexing queue. /// indexing queue.
/// Each indexing thread builds its own independent `Segment`, via /// Each indexing thread builds its own independent [`Segment`], via
/// a `SegmentWriter` object. /// a `SegmentWriter` object.
pub struct IndexWriter { pub struct IndexWriter {
// the lock is just used to bind the // the lock is just used to bind the
@@ -385,8 +385,8 @@ impl IndexWriter {
.operation_receiver() .operation_receiver()
.ok_or_else(|| { .ok_or_else(|| {
crate::TantivyError::ErrorInThread( crate::TantivyError::ErrorInThread(
"The index writer was killed. It can happen if an indexing worker \ "The index writer was killed. It can happen if an indexing worker encountered \
encounterred an Io error for instance." an Io error for instance."
.to_string(), .to_string(),
) )
}) })
@@ -595,14 +595,14 @@ impl IndexWriter {
/// * `.commit()`: to accept this commit /// * `.commit()`: to accept this commit
/// * `.abort()`: to cancel this commit. /// * `.abort()`: to cancel this commit.
/// ///
/// In the current implementation, `PreparedCommit` borrows /// In the current implementation, [`PreparedCommit`] borrows
/// the `IndexWriter` mutably so we are guaranteed that no new /// the [`IndexWriter`] mutably so we are guaranteed that no new
/// document can be added as long as it is committed or is /// document can be added as long as it is committed or is
/// dropped. /// dropped.
/// ///
/// It is also possible to add a payload to the `commit` /// It is also possible to add a payload to the `commit`
/// using this API. /// using this API.
/// See [`PreparedCommit::set_payload()`](PreparedCommit.html) /// See [`PreparedCommit::set_payload()`].
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> { pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> {
// Here, because we join all of the worker threads, // Here, because we join all of the worker threads,
// all of the segment update for this commit have been // all of the segment update for this commit have been

View File

@@ -3,7 +3,7 @@
//! In "The beauty and the beast", the term "the" appears in position 0 and position 3. //! In "The beauty and the beast", the term "the" appears in position 0 and position 3.
//! This information is useful to run phrase queries. //! This information is useful to run phrase queries.
//! //!
//! The [position](../enum.SegmentComponent.html#variant.Positions) file contains all of the //! The [position](crate::SegmentComponent::Positions) file contains all of the
//! bitpacked positions delta, for all terms of a given field, one term after the other. //! bitpacked positions delta, for all terms of a given field, one term after the other.
//! //!
//! Each term is encoded independently. //! Each term is encoded independently.

View File

@@ -12,7 +12,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
/// ``` /// ```
/// ///
/// the `start` argument is just used to hint that the response is /// the `start` argument is just used to hint that the response is
/// greater than beyond `start`. the implementation may or may not use /// greater than beyond `start`. The implementation may or may not use
/// it for optimization. /// it for optimization.
/// ///
/// # Assumption /// # Assumption

View File

@@ -72,7 +72,7 @@ impl PhraseQuery {
self.slop = value; self.slop = value;
} }
/// The `Field` this `PhraseQuery` is targeting. /// The [`Field`] this `PhraseQuery` is targeting.
pub fn field(&self) -> Field { pub fn field(&self) -> Field {
self.field self.field
} }
@@ -85,10 +85,10 @@ impl PhraseQuery {
.collect::<Vec<Term>>() .collect::<Vec<Term>>()
} }
/// Returns the `PhraseWeight` for the given phrase query given a specific `searcher`. /// Returns the [`PhraseWeight`] for the given phrase query given a specific `searcher`.
/// ///
/// This function is the same as `.weight(...)` except it returns /// This function is the same as [`Query::weight()`] except it returns
/// a specialized type `PhraseWeight` instead of a Boxed trait. /// a specialized type [`PhraseWeight`] instead of a Boxed trait.
pub(crate) fn phrase_weight( pub(crate) fn phrase_weight(
&self, &self,
searcher: &Searcher, searcher: &Searcher,
@@ -121,7 +121,7 @@ impl PhraseQuery {
impl Query for PhraseQuery { impl Query for PhraseQuery {
/// Create the weight associated to a query. /// Create the weight associated to a query.
/// ///
/// See [`Weight`](./trait.Weight.html). /// See [`Weight`].
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> { fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?; let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?;
Ok(Box::new(phrase_weight)) Ok(Box::new(phrase_weight))

View File

@@ -41,10 +41,12 @@ pub(crate) fn for_each_pruning_scorer<TScorer: Scorer + ?Sized>(
} }
} }
/// A Weight is the specialization of a Query /// A Weight is the specialization of a `Query`
/// for a given set of segments. /// for a given set of segments.
/// ///
/// See [`Query`](./trait.Query.html). /// See [`Query`].
///
/// [`Query`]: crate::query::Query
pub trait Weight: Send + Sync + 'static { pub trait Weight: Send + Sync + 'static {
/// Returns the scorer for the given segment. /// Returns the scorer for the given segment.
/// ///

View File

@@ -23,7 +23,7 @@ pub enum ReloadPolicy {
/// The index is entirely reloaded manually. /// The index is entirely reloaded manually.
/// All updates of the index should be manual. /// All updates of the index should be manual.
/// ///
/// No change is reflected automatically. You are required to call `IndexReader::reload()` /// No change is reflected automatically. You are required to call [`IndexReader::reload()`]
/// manually. /// manually.
Manual, Manual,
/// The index is reloaded within milliseconds after a new commit is available. /// The index is reloaded within milliseconds after a new commit is available.
@@ -31,11 +31,11 @@ pub enum ReloadPolicy {
OnCommit, // TODO add NEAR_REAL_TIME(target_ms) OnCommit, // TODO add NEAR_REAL_TIME(target_ms)
} }
/// [IndexReader] builder /// [`IndexReader`] builder
/// ///
/// It makes it possible to configure: /// It makes it possible to configure:
/// - [ReloadPolicy] defining when new index versions are detected /// - [`ReloadPolicy`] defining when new index versions are detected
/// - [Warmer] implementations /// - [`Warmer`] implementations
/// - number of warming threads, for parallelizing warming work /// - number of warming threads, for parallelizing warming work
/// - The cache size of the underlying doc store readers. /// - The cache size of the underlying doc store readers.
#[derive(Clone)] #[derive(Clone)]
@@ -108,7 +108,7 @@ impl IndexReaderBuilder {
/// Sets the reload_policy. /// Sets the reload_policy.
/// ///
/// See [`ReloadPolicy`](./enum.ReloadPolicy.html) for more details. /// See [`ReloadPolicy`] for more details.
#[must_use] #[must_use]
pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder { pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder {
self.reload_policy = reload_policy; self.reload_policy = reload_policy;
@@ -133,8 +133,8 @@ impl IndexReaderBuilder {
/// Sets the number of warming threads. /// Sets the number of warming threads.
/// ///
/// This allows parallelizing warming work when there are multiple [Warmer] registered with the /// This allows parallelizing warming work when there are multiple [`Warmer`] registered with
/// [IndexReader]. /// the [`IndexReader`].
#[must_use] #[must_use]
pub fn num_warming_threads(mut self, num_warming_threads: usize) -> IndexReaderBuilder { pub fn num_warming_threads(mut self, num_warming_threads: usize) -> IndexReaderBuilder {
self.num_warming_threads = num_warming_threads; self.num_warming_threads = num_warming_threads;
@@ -186,7 +186,7 @@ impl InnerIndexReader {
searcher_generation_inventory, searcher_generation_inventory,
}) })
} }
/// Opens the freshest segments `SegmentReader`. /// Opens the freshest segments [`SegmentReader`].
/// ///
/// This function acquires a lot to prevent GC from removing files /// This function acquires a lot to prevent GC from removing files
/// as we are opening our index. /// as we are opening our index.
@@ -264,7 +264,7 @@ impl InnerIndexReader {
/// you instances of `Searcher` for the last loaded version. /// you instances of `Searcher` for the last loaded version.
/// ///
/// `Clone` does not clone the different pool of searcher. `IndexReader` /// `Clone` does not clone the different pool of searcher. `IndexReader`
/// just wraps and `Arc`. /// just wraps an `Arc`.
#[derive(Clone)] #[derive(Clone)]
pub struct IndexReader { pub struct IndexReader {
inner: Arc<InnerIndexReader>, inner: Arc<InnerIndexReader>,
@@ -280,7 +280,7 @@ impl IndexReader {
/// Update searchers so that they reflect the state of the last /// Update searchers so that they reflect the state of the last
/// `.commit()`. /// `.commit()`.
/// ///
/// If you set up the `OnCommit` `ReloadPolicy` (which is the default) /// If you set up the [`ReloadPolicy::OnCommit`] (which is the default)
/// every commit should be rapidly reflected on your `IndexReader` and you should /// every commit should be rapidly reflected on your `IndexReader` and you should
/// not need to call `reload()` at all. /// not need to call `reload()` at all.
/// ///

View File

@@ -27,8 +27,7 @@
//! //!
//! - at the segment level, the //! - at the segment level, the
//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc) //! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
//! - at the index level, the //! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method
//! [`Searcher`'s `doc` method](../struct.Searcher.html#method.doc)
//! //!
//! ! //! !

View File

@@ -152,7 +152,7 @@ pub use self::whitespace_tokenizer::WhitespaceTokenizer;
/// Maximum authorized len (in bytes) for a token. /// Maximum authorized len (in bytes) for a token.
/// ///
/// Tokenizer are in charge of not emitting tokens larger than this value. /// Tokenizers are in charge of not emitting tokens larger than this value.
/// Currently, if a faulty tokenizer implementation emits tokens with a length larger than /// Currently, if a faulty tokenizer implementation emits tokens with a length larger than
/// `2^16 - 1 - 5`, the token will simply be ignored downstream. /// `2^16 - 1 - 5`, the token will simply be ignored downstream.
pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5; pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5;