mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Documentation improvements.
Fix some linking, some grammar, some typos, etc.
This commit is contained in:
@@ -95,7 +95,7 @@ called [`Directory`](src/directory/directory.rs).
|
||||
Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object.
|
||||
Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details.
|
||||
|
||||
Tantivy ships two main directory implementation: the `MMapDirectory` and the `RAMDirectory`,
|
||||
Tantivy ships two main directory implementation: the `MmapDirectory` and the `RamDirectory`,
|
||||
but users can extend tantivy with their own implementation.
|
||||
|
||||
## [schema/](src/schema): What are documents?
|
||||
|
||||
@@ -55,13 +55,13 @@ const HIGHEST_BIT: u64 = 1 << 63;
|
||||
/// to values over 2^63, and all values end up requiring 64 bits.
|
||||
///
|
||||
/// # See also
|
||||
/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
|
||||
/// The reverse mapping is [`u64_to_i64()`].
|
||||
#[inline]
|
||||
pub fn i64_to_u64(val: i64) -> u64 {
|
||||
(val as u64) ^ HIGHEST_BIT
|
||||
}
|
||||
|
||||
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
||||
/// Reverse the mapping given by [`i64_to_u64()`].
|
||||
#[inline]
|
||||
pub fn u64_to_i64(val: u64) -> i64 {
|
||||
(val ^ HIGHEST_BIT) as i64
|
||||
@@ -83,7 +83,7 @@ pub fn u64_to_i64(val: u64) -> i64 {
|
||||
/// explains the mapping in a clear manner.
|
||||
///
|
||||
/// # See also
|
||||
/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
|
||||
/// The reverse mapping is [`u64_to_f64()`].
|
||||
#[inline]
|
||||
pub fn f64_to_u64(val: f64) -> u64 {
|
||||
let bits = val.to_bits();
|
||||
@@ -94,7 +94,7 @@ pub fn f64_to_u64(val: f64) -> u64 {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
||||
/// Reverse the mapping given by [`f64_to_u64()`].
|
||||
#[inline]
|
||||
pub fn u64_to_f64(val: u64) -> f64 {
|
||||
f64::from_bits(if val & HIGHEST_BIT != 0 {
|
||||
|
||||
@@ -55,7 +55,7 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
|
||||
}
|
||||
|
||||
/// Struct used to prevent from calling
|
||||
/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
|
||||
/// [`terminate_ref`](TerminatingWrite::terminate_ref) directly
|
||||
///
|
||||
/// The point is that while the type is public, it cannot be built by anyone
|
||||
/// outside of this module.
|
||||
|
||||
@@ -36,8 +36,7 @@ fn main() -> tantivy::Result<()> {
|
||||
// need to be able to be able to retrieve it
|
||||
// for our application.
|
||||
//
|
||||
// We can make our index lighter and
|
||||
// by omitting `STORED` flag.
|
||||
// We can make our index lighter by omitting the `STORED` flag.
|
||||
let body = schema_builder.add_text_field("body", TEXT);
|
||||
|
||||
let schema = schema_builder.build();
|
||||
|
||||
@@ -23,7 +23,7 @@ const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|
|
||||
/// Parses a field_name
|
||||
/// A field name must have at least one character and be followed by a colon.
|
||||
/// All characters are allowed including special characters `SPECIAL_CHARS`, but these
|
||||
/// need to be escaped with a backslack character '\'.
|
||||
/// need to be escaped with a backslash character '\'.
|
||||
fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
|
||||
@@ -68,7 +68,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
///
|
||||
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
|
||||
/// We delegate rejecting such invalid dates to the logical AST computation code
|
||||
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
|
||||
/// which invokes `time::OffsetDateTime::parse(..., &Rfc3339)` on the value to actually parse
|
||||
/// it (instead of merely extracting the datetime value as string as done here).
|
||||
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
let two_digits = || recognize::<String, _, _>((digit(), digit()));
|
||||
|
||||
@@ -67,10 +67,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
|
||||
/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
|
||||
///
|
||||
/// Once collection is finished, you can harvest its results in the form
|
||||
/// of a `FacetCounts` object, and extract your face t counts from it.
|
||||
/// of a [`FacetCounts`] object, and extract your facet counts from it.
|
||||
///
|
||||
/// This implementation assumes you are working with a number of facets that
|
||||
/// is much hundreds of time lower than your number of documents.
|
||||
/// is many hundreds of times smaller than your number of documents.
|
||||
///
|
||||
///
|
||||
/// ```rust
|
||||
@@ -231,7 +231,7 @@ impl FacetCollector {
|
||||
///
|
||||
/// Adding two facets within which one is the prefix of the other is forbidden.
|
||||
/// If you need the correct number of unique documents for two such facets,
|
||||
/// just add them in separate `FacetCollector`.
|
||||
/// just add them in a separate `FacetCollector`.
|
||||
pub fn add_facet<T>(&mut self, facet_from: T)
|
||||
where Facet: From<T> {
|
||||
let facet = Facet::from(facet_from);
|
||||
@@ -391,7 +391,7 @@ impl<'a> Iterator for FacetChildIterator<'a> {
|
||||
|
||||
impl FacetCounts {
|
||||
/// Returns an iterator over all of the facet count pairs inside this result.
|
||||
/// See the documentation for [FacetCollector] for a usage example.
|
||||
/// See the documentation for [`FacetCollector`] for a usage example.
|
||||
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
|
||||
where Facet: From<T> {
|
||||
let facet = Facet::from(facet_from);
|
||||
@@ -410,7 +410,7 @@ impl FacetCounts {
|
||||
}
|
||||
|
||||
/// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts.
|
||||
/// See the documentation for [FacetCollector] for a usage example.
|
||||
/// See the documentation for [`FacetCollector`] for a usage example.
|
||||
pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
|
||||
where Facet: From<T> {
|
||||
let mut heap = BinaryHeap::with_capacity(k);
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
//! In tantivy jargon, we call this information your search "fruit".
|
||||
//!
|
||||
//! Your fruit could for instance be :
|
||||
//! - [the count of matching documents](./struct.Count.html)
|
||||
//! - [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
|
||||
//! - [facet counts](./struct.FacetCollector.html)
|
||||
//! - [the count of matching documents](crate::collector::Count)
|
||||
//! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs)
|
||||
//! - [facet counts](FacetCollector)
|
||||
//!
|
||||
//! At one point in your code, you will trigger the actual search operation by calling
|
||||
//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
|
||||
|
||||
@@ -78,8 +78,8 @@ fn save_new_metas(
|
||||
|
||||
/// IndexBuilder can be used to create an index.
|
||||
///
|
||||
/// Use in conjunction with `SchemaBuilder`. Global index settings
|
||||
/// can be configured with `IndexSettings`
|
||||
/// Use in conjunction with [`SchemaBuilder`][crate::schema::SchemaBuilder].
|
||||
/// Global index settings can be configured with [`IndexSettings`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@@ -97,7 +97,13 @@ fn save_new_metas(
|
||||
/// );
|
||||
///
|
||||
/// let schema = schema_builder.build();
|
||||
/// let settings = IndexSettings{sort_by_field: Some(IndexSortByField{field:"number".to_string(), order:Order::Asc}), ..Default::default()};
|
||||
/// let settings = IndexSettings{
|
||||
/// sort_by_field: Some(IndexSortByField{
|
||||
/// field: "number".to_string(),
|
||||
/// order: Order::Asc
|
||||
/// }),
|
||||
/// ..Default::default()
|
||||
/// };
|
||||
/// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
|
||||
/// ```
|
||||
pub struct IndexBuilder {
|
||||
@@ -140,7 +146,7 @@ impl IndexBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Creates a new index using the `RAMDirectory`.
|
||||
/// Creates a new index using the [`RamDirectory`].
|
||||
///
|
||||
/// The index will be allocated in anonymous memory.
|
||||
/// This should only be used for unit tests.
|
||||
@@ -148,13 +154,14 @@ impl IndexBuilder {
|
||||
let ram_directory = RamDirectory::create();
|
||||
Ok(self
|
||||
.create(ram_directory)
|
||||
.expect("Creating a RAMDirectory should never fail"))
|
||||
.expect("Creating a RamDirectory should never fail"))
|
||||
}
|
||||
|
||||
/// Creates a new index in a given filepath.
|
||||
/// The index will use the `MMapDirectory`.
|
||||
/// The index will use the [`MmapDirectory`].
|
||||
///
|
||||
/// If a previous index was in this directory, it returns an `IndexAlreadyExists` error.
|
||||
/// If a previous index was in this directory, it returns an
|
||||
/// [`TantivyError::IndexAlreadyExists`] error.
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
|
||||
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
|
||||
@@ -185,12 +192,13 @@ impl IndexBuilder {
|
||||
|
||||
/// Creates a new index in a temp directory.
|
||||
///
|
||||
/// The index will use the `MMapDirectory` in a newly created directory.
|
||||
/// The temp directory will be destroyed automatically when the `Index` object
|
||||
/// The index will use the [`MmapDirectory`] in a newly created directory.
|
||||
/// The temp directory will be destroyed automatically when the [`Index`] object
|
||||
/// is destroyed.
|
||||
///
|
||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
||||
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
||||
/// The temp directory is only used for testing the [`MmapDirectory`].
|
||||
/// For other unit tests, prefer the [`RamDirectory`], see:
|
||||
/// [`IndexBuilder::create_in_ram()`].
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create_from_tempdir(self) -> crate::Result<Index> {
|
||||
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
|
||||
@@ -286,7 +294,7 @@ impl Index {
|
||||
self.set_multithread_executor(default_num_threads)
|
||||
}
|
||||
|
||||
/// Creates a new index using the `RamDirectory`.
|
||||
/// Creates a new index using the [`RamDirectory`].
|
||||
///
|
||||
/// The index will be allocated in anonymous memory.
|
||||
/// This is useful for indexing small set of documents
|
||||
@@ -296,9 +304,10 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Creates a new index in a given filepath.
|
||||
/// The index will use the `MMapDirectory`.
|
||||
/// The index will use the [`MmapDirectory`].
|
||||
///
|
||||
/// If a previous index was in this directory, then it returns an `IndexAlreadyExists` error.
|
||||
/// If a previous index was in this directory, then it returns
|
||||
/// a [`TantivyError::IndexAlreadyExists`] error.
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create_in_dir<P: AsRef<Path>>(
|
||||
directory_path: P,
|
||||
@@ -320,12 +329,13 @@ impl Index {
|
||||
|
||||
/// Creates a new index in a temp directory.
|
||||
///
|
||||
/// The index will use the `MMapDirectory` in a newly created directory.
|
||||
/// The temp directory will be destroyed automatically when the `Index` object
|
||||
/// The index will use the [`MmapDirectory`] in a newly created directory.
|
||||
/// The temp directory will be destroyed automatically when the [`Index`] object
|
||||
/// is destroyed.
|
||||
///
|
||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
||||
/// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`.
|
||||
/// The temp directory is only used for testing the [`MmapDirectory`].
|
||||
/// For other unit tests, prefer the [`RamDirectory`],
|
||||
/// see: [`IndexBuilder::create_in_ram()`].
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
|
||||
IndexBuilder::new().schema(schema).create_from_tempdir()
|
||||
@@ -345,7 +355,7 @@ impl Index {
|
||||
builder.create(dir)
|
||||
}
|
||||
|
||||
/// Creates a new index given a directory and an `IndexMeta`.
|
||||
/// Creates a new index given a directory and an [`IndexMeta`].
|
||||
fn open_from_metas(
|
||||
directory: ManagedDirectory,
|
||||
metas: &IndexMeta,
|
||||
@@ -372,7 +382,7 @@ impl Index {
|
||||
&self.tokenizers
|
||||
}
|
||||
|
||||
/// Helper to access the tokenizer associated to a specific field.
|
||||
/// Get the tokenizer associated with a specific field.
|
||||
pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> {
|
||||
let field_entry = self.schema.get_field_entry(field);
|
||||
let field_type = field_entry.field_type();
|
||||
@@ -404,14 +414,14 @@ impl Index {
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a default `IndexReader` for the given index.
|
||||
/// Create a default [`IndexReader`] for the given index.
|
||||
///
|
||||
/// See [`Index.reader_builder()`](#method.reader_builder).
|
||||
/// See [`Index.reader_builder()`].
|
||||
pub fn reader(&self) -> crate::Result<IndexReader> {
|
||||
self.reader_builder().try_into()
|
||||
}
|
||||
|
||||
/// Create a `IndexReader` for the given index.
|
||||
/// Create a [`IndexReader`] for the given index.
|
||||
///
|
||||
/// Most project should create at most one reader for a given index.
|
||||
/// This method is typically called only once per `Index` instance.
|
||||
|
||||
@@ -82,7 +82,7 @@ impl Searcher {
|
||||
/// Fetches a document from tantivy's store given a `DocAddress`.
|
||||
///
|
||||
/// The searcher uses the segment ordinal to route the
|
||||
/// the request to the right `Segment`.
|
||||
/// request to the right `Segment`.
|
||||
pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
|
||||
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
||||
store_reader.get(doc_address.doc_id)
|
||||
|
||||
@@ -117,9 +117,9 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
/// change.
|
||||
///
|
||||
/// Specifically, subsequent writes or flushes should
|
||||
/// have no effect on the returned `FileSlice` object.
|
||||
/// have no effect on the returned [`FileSlice`] object.
|
||||
///
|
||||
/// You should only use this to read files create with [Directory::open_write].
|
||||
/// You should only use this to read files create with [`Directory::open_write()`].
|
||||
fn open_read(&self, path: &Path) -> Result<FileSlice, OpenReadError> {
|
||||
let file_handle = self.get_file_handle(path)?;
|
||||
Ok(FileSlice::new(file_handle))
|
||||
@@ -128,27 +128,28 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
/// Removes a file
|
||||
///
|
||||
/// Removing a file will not affect an eventual
|
||||
/// existing FileSlice pointing to it.
|
||||
/// existing [`FileSlice`] pointing to it.
|
||||
///
|
||||
/// Removing a nonexistent file, yields a
|
||||
/// `DeleteError::DoesNotExist`.
|
||||
/// Removing a nonexistent file, returns a
|
||||
/// [`DeleteError::FileDoesNotExist`].
|
||||
fn delete(&self, path: &Path) -> Result<(), DeleteError>;
|
||||
|
||||
/// Returns true if and only if the file exists
|
||||
fn exists(&self, path: &Path) -> Result<bool, OpenReadError>;
|
||||
|
||||
/// Opens a writer for the *virtual file* associated with
|
||||
/// a Path.
|
||||
/// a [`Path`].
|
||||
///
|
||||
/// Right after this call, for the span of the execution of the program
|
||||
/// the file should be created and any subsequent call to `open_read` for the
|
||||
/// same path should return a `FileSlice`.
|
||||
/// the file should be created and any subsequent call to
|
||||
/// [`Directory::open_read()`] for the same path should return
|
||||
/// a [`FileSlice`].
|
||||
///
|
||||
/// However, depending on the directory implementation,
|
||||
/// it might be required to call `sync_directory` to ensure
|
||||
/// it might be required to call [`Directory::sync_directory()`] to ensure
|
||||
/// that the file is durably created.
|
||||
/// (The semantics here are the same when dealing with
|
||||
/// a posix filesystem.)
|
||||
/// a POSIX filesystem.)
|
||||
///
|
||||
/// Write operations may be aggressively buffered.
|
||||
/// The client of this trait is responsible for calling flush
|
||||
@@ -157,19 +158,19 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
///
|
||||
/// Flush operation should also be persistent.
|
||||
///
|
||||
/// The user shall not rely on `Drop` triggering `flush`.
|
||||
/// Note that `RamDirectory` will panic! if `flush`
|
||||
/// was not called.
|
||||
/// The user shall not rely on [`Drop`] triggering `flush`.
|
||||
/// Note that [`RamDirectory`][crate::directory::RamDirectory] will
|
||||
/// panic! if `flush` was not called.
|
||||
///
|
||||
/// The file may not previously exist.
|
||||
fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError>;
|
||||
|
||||
/// Reads the full content file that has been written using
|
||||
/// atomic_write.
|
||||
/// [`Directory::atomic_write()`].
|
||||
///
|
||||
/// This should only be used for small files.
|
||||
///
|
||||
/// You should only use this to read files create with [Directory::atomic_write].
|
||||
/// You should only use this to read files create with [`Directory::atomic_write()`].
|
||||
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;
|
||||
|
||||
/// Atomically replace the content of a file with data.
|
||||
@@ -188,7 +189,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
|
||||
/// Acquire a lock in the given directory.
|
||||
///
|
||||
/// The method is blocking or not depending on the `Lock` object.
|
||||
/// The method is blocking or not depending on the [`Lock`] object.
|
||||
fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
|
||||
let mut box_directory = self.box_clone();
|
||||
let mut retry_policy = retry_policy(lock.is_blocking);
|
||||
@@ -210,15 +211,15 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
}
|
||||
|
||||
/// Registers a callback that will be called whenever a change on the `meta.json`
|
||||
/// using the `atomic_write` API is detected.
|
||||
/// using the [`Directory::atomic_write()`] API is detected.
|
||||
///
|
||||
/// The behavior when using `.watch()` on a file using [Directory::open_write] is, on the other
|
||||
/// hand, undefined.
|
||||
/// The behavior when using `.watch()` on a file using [`Directory::open_write()`] is, on the
|
||||
/// other hand, undefined.
|
||||
///
|
||||
/// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
|
||||
/// required to keep it.
|
||||
/// It does not override previous callbacks. When the file is modified, all callback that are
|
||||
/// registered (and whose `WatchHandle` is still alive) are triggered.
|
||||
/// registered (and whose [`WatchHandle`] is still alive) are triggered.
|
||||
///
|
||||
/// Internally, tantivy only uses this API to detect new commits to implement the
|
||||
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
||||
|
||||
@@ -8,8 +8,8 @@ use once_cell::sync::Lazy;
|
||||
/// [`LockParams`](./enum.LockParams.html).
|
||||
/// Tantivy itself uses only two locks but client application
|
||||
/// can use the directory facility to define their own locks.
|
||||
/// - [INDEX_WRITER_LOCK]
|
||||
/// - [META_LOCK]
|
||||
/// - [`INDEX_WRITER_LOCK`]
|
||||
/// - [`META_LOCK`]
|
||||
///
|
||||
/// Check out these locks documentation for more information.
|
||||
#[derive(Debug)]
|
||||
@@ -30,7 +30,8 @@ pub struct Lock {
|
||||
}
|
||||
|
||||
/// Only one process should be able to write tantivy's index at a time.
|
||||
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
|
||||
/// This lock file, when present, is in charge of preventing other processes to open an
|
||||
/// `IndexWriter`.
|
||||
///
|
||||
/// If the process is killed and this file remains, it is safe to remove it manually.
|
||||
///
|
||||
|
||||
@@ -56,10 +56,10 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
||||
|
||||
#[derive(Default, Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct CacheCounters {
|
||||
// Number of time the cache prevents to call `mmap`
|
||||
/// Number of time the cache prevents to call `mmap`
|
||||
pub hit: usize,
|
||||
// Number of time tantivy had to call `mmap`
|
||||
// as no entry was in the cache.
|
||||
/// Number of time tantivy had to call `mmap`
|
||||
/// as no entry was in the cache.
|
||||
pub miss: usize,
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::directory::{
|
||||
WatchHandle, WritePtr,
|
||||
};
|
||||
|
||||
/// Writer associated with the `RamDirectory`
|
||||
/// Writer associated with the [`RamDirectory`].
|
||||
///
|
||||
/// The Writer just writes a buffer.
|
||||
struct VecWriter {
|
||||
@@ -137,17 +137,17 @@ impl RamDirectory {
|
||||
}
|
||||
|
||||
/// Returns the sum of the size of the different files
|
||||
/// in the RamDirectory.
|
||||
/// in the [`RamDirectory`].
|
||||
pub fn total_mem_usage(&self) -> usize {
|
||||
self.fs.read().unwrap().total_mem_usage()
|
||||
}
|
||||
|
||||
/// Write a copy of all of the files saved in the RamDirectory in the target `Directory`.
|
||||
/// Write a copy of all of the files saved in the [`RamDirectory`] in the target [`Directory`].
|
||||
///
|
||||
/// Files are all written using the `Directory::write` meaning, even if they were
|
||||
/// written using the `atomic_write` api.
|
||||
/// Files are all written using the [`Directory::open_write()`] meaning, even if they were
|
||||
/// written using the [`Directory::atomic_write()`] api.
|
||||
///
|
||||
/// If an error is encounterred, files may be persisted partially.
|
||||
/// If an error is encountered, files may be persisted partially.
|
||||
pub fn persist(&self, dest: &dyn Directory) -> crate::Result<()> {
|
||||
let wlock = self.fs.write().unwrap();
|
||||
for (path, file) in wlock.fs.iter() {
|
||||
|
||||
@@ -13,15 +13,17 @@ use crate::DocId;
|
||||
/// This `BytesFastFieldWriter` is only useful for advanced users.
|
||||
/// The normal way to get your associated bytes in your index
|
||||
/// is to
|
||||
/// - declare your field with fast set to `Cardinality::SingleValue`
|
||||
/// - declare your field with fast set to
|
||||
/// [`Cardinality::SingleValue`](crate::schema::Cardinality::SingleValue)
|
||||
/// in your schema
|
||||
/// - add your document simply by calling `.add_document(...)` with associating bytes to the field.
|
||||
///
|
||||
/// The `BytesFastFieldWriter` can be acquired from the
|
||||
/// fast field writer by calling
|
||||
/// [`.get_bytes_writer(...)`](./struct.FastFieldsWriter.html#method.get_bytes_writer).
|
||||
/// [`.get_bytes_writer_mut(...)`](crate::fastfield::FastFieldsWriter::get_bytes_writer_mut).
|
||||
///
|
||||
/// Once acquired, writing is done by calling `.add_document_val(&[u8])`
|
||||
/// Once acquired, writing is done by calling
|
||||
/// [`.add_document_val(&[u8])`](BytesFastFieldWriter::add_document_val)
|
||||
/// once per document, even if there are no bytes associated to it.
|
||||
pub struct BytesFastFieldWriter {
|
||||
field: Field,
|
||||
|
||||
@@ -31,7 +31,7 @@ pub const MARGIN_IN_BYTES: usize = 1_000_000;
|
||||
pub const MEMORY_ARENA_NUM_BYTES_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize;
|
||||
pub const MEMORY_ARENA_NUM_BYTES_MAX: usize = u32::MAX as usize - MARGIN_IN_BYTES;
|
||||
|
||||
// We impose the number of index writer thread to be at most this.
|
||||
// We impose the number of index writer threads to be at most this.
|
||||
pub const MAX_NUM_THREAD: usize = 8;
|
||||
|
||||
// Add document will block if the number of docs waiting in the queue to be indexed
|
||||
@@ -40,7 +40,7 @@ const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;
|
||||
|
||||
fn error_in_index_worker_thread(context: &str) -> TantivyError {
|
||||
TantivyError::ErrorInThread(format!(
|
||||
"{}. A worker thread encounterred an error (io::Error most likely) or panicked.",
|
||||
"{}. A worker thread encountered an error (io::Error most likely) or panicked.",
|
||||
context
|
||||
))
|
||||
}
|
||||
@@ -49,7 +49,7 @@ fn error_in_index_worker_thread(context: &str) -> TantivyError {
|
||||
///
|
||||
/// It manages a small number of indexing thread, as well as a shared
|
||||
/// indexing queue.
|
||||
/// Each indexing thread builds its own independent `Segment`, via
|
||||
/// Each indexing thread builds its own independent [`Segment`], via
|
||||
/// a `SegmentWriter` object.
|
||||
pub struct IndexWriter {
|
||||
// the lock is just used to bind the
|
||||
@@ -385,8 +385,8 @@ impl IndexWriter {
|
||||
.operation_receiver()
|
||||
.ok_or_else(|| {
|
||||
crate::TantivyError::ErrorInThread(
|
||||
"The index writer was killed. It can happen if an indexing worker \
|
||||
encounterred an Io error for instance."
|
||||
"The index writer was killed. It can happen if an indexing worker encountered \
|
||||
an Io error for instance."
|
||||
.to_string(),
|
||||
)
|
||||
})
|
||||
@@ -595,14 +595,14 @@ impl IndexWriter {
|
||||
/// * `.commit()`: to accept this commit
|
||||
/// * `.abort()`: to cancel this commit.
|
||||
///
|
||||
/// In the current implementation, `PreparedCommit` borrows
|
||||
/// the `IndexWriter` mutably so we are guaranteed that no new
|
||||
/// In the current implementation, [`PreparedCommit`] borrows
|
||||
/// the [`IndexWriter`] mutably so we are guaranteed that no new
|
||||
/// document can be added as long as it is committed or is
|
||||
/// dropped.
|
||||
///
|
||||
/// It is also possible to add a payload to the `commit`
|
||||
/// using this API.
|
||||
/// See [`PreparedCommit::set_payload()`](PreparedCommit.html)
|
||||
/// See [`PreparedCommit::set_payload()`].
|
||||
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> {
|
||||
// Here, because we join all of the worker threads,
|
||||
// all of the segment update for this commit have been
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! In "The beauty and the beast", the term "the" appears in position 0 and position 3.
|
||||
//! This information is useful to run phrase queries.
|
||||
//!
|
||||
//! The [position](../enum.SegmentComponent.html#variant.Positions) file contains all of the
|
||||
//! The [position](crate::SegmentComponent::Positions) file contains all of the
|
||||
//! bitpacked positions delta, for all terms of a given field, one term after the other.
|
||||
//!
|
||||
//! Each term is encoded independently.
|
||||
|
||||
@@ -12,7 +12,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||
/// ```
|
||||
///
|
||||
/// the `start` argument is just used to hint that the response is
|
||||
/// greater than beyond `start`. the implementation may or may not use
|
||||
/// greater than beyond `start`. The implementation may or may not use
|
||||
/// it for optimization.
|
||||
///
|
||||
/// # Assumption
|
||||
|
||||
@@ -72,7 +72,7 @@ impl PhraseQuery {
|
||||
self.slop = value;
|
||||
}
|
||||
|
||||
/// The `Field` this `PhraseQuery` is targeting.
|
||||
/// The [`Field`] this `PhraseQuery` is targeting.
|
||||
pub fn field(&self) -> Field {
|
||||
self.field
|
||||
}
|
||||
@@ -85,10 +85,10 @@ impl PhraseQuery {
|
||||
.collect::<Vec<Term>>()
|
||||
}
|
||||
|
||||
/// Returns the `PhraseWeight` for the given phrase query given a specific `searcher`.
|
||||
/// Returns the [`PhraseWeight`] for the given phrase query given a specific `searcher`.
|
||||
///
|
||||
/// This function is the same as `.weight(...)` except it returns
|
||||
/// a specialized type `PhraseWeight` instead of a Boxed trait.
|
||||
/// This function is the same as [`Query::weight()`] except it returns
|
||||
/// a specialized type [`PhraseWeight`] instead of a Boxed trait.
|
||||
pub(crate) fn phrase_weight(
|
||||
&self,
|
||||
searcher: &Searcher,
|
||||
@@ -121,7 +121,7 @@ impl PhraseQuery {
|
||||
impl Query for PhraseQuery {
|
||||
/// Create the weight associated to a query.
|
||||
///
|
||||
/// See [`Weight`](./trait.Weight.html).
|
||||
/// See [`Weight`].
|
||||
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
|
||||
let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?;
|
||||
Ok(Box::new(phrase_weight))
|
||||
|
||||
@@ -41,10 +41,12 @@ pub(crate) fn for_each_pruning_scorer<TScorer: Scorer + ?Sized>(
|
||||
}
|
||||
}
|
||||
|
||||
/// A Weight is the specialization of a Query
|
||||
/// A Weight is the specialization of a `Query`
|
||||
/// for a given set of segments.
|
||||
///
|
||||
/// See [`Query`](./trait.Query.html).
|
||||
/// See [`Query`].
|
||||
///
|
||||
/// [`Query`]: crate::query::Query
|
||||
pub trait Weight: Send + Sync + 'static {
|
||||
/// Returns the scorer for the given segment.
|
||||
///
|
||||
|
||||
@@ -23,7 +23,7 @@ pub enum ReloadPolicy {
|
||||
/// The index is entirely reloaded manually.
|
||||
/// All updates of the index should be manual.
|
||||
///
|
||||
/// No change is reflected automatically. You are required to call `IndexReader::reload()`
|
||||
/// No change is reflected automatically. You are required to call [`IndexReader::reload()`]
|
||||
/// manually.
|
||||
Manual,
|
||||
/// The index is reloaded within milliseconds after a new commit is available.
|
||||
@@ -31,11 +31,11 @@ pub enum ReloadPolicy {
|
||||
OnCommit, // TODO add NEAR_REAL_TIME(target_ms)
|
||||
}
|
||||
|
||||
/// [IndexReader] builder
|
||||
/// [`IndexReader`] builder
|
||||
///
|
||||
/// It makes it possible to configure:
|
||||
/// - [ReloadPolicy] defining when new index versions are detected
|
||||
/// - [Warmer] implementations
|
||||
/// - [`ReloadPolicy`] defining when new index versions are detected
|
||||
/// - [`Warmer`] implementations
|
||||
/// - number of warming threads, for parallelizing warming work
|
||||
/// - The cache size of the underlying doc store readers.
|
||||
#[derive(Clone)]
|
||||
@@ -108,7 +108,7 @@ impl IndexReaderBuilder {
|
||||
|
||||
/// Sets the reload_policy.
|
||||
///
|
||||
/// See [`ReloadPolicy`](./enum.ReloadPolicy.html) for more details.
|
||||
/// See [`ReloadPolicy`] for more details.
|
||||
#[must_use]
|
||||
pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder {
|
||||
self.reload_policy = reload_policy;
|
||||
@@ -133,8 +133,8 @@ impl IndexReaderBuilder {
|
||||
|
||||
/// Sets the number of warming threads.
|
||||
///
|
||||
/// This allows parallelizing warming work when there are multiple [Warmer] registered with the
|
||||
/// [IndexReader].
|
||||
/// This allows parallelizing warming work when there are multiple [`Warmer`] registered with
|
||||
/// the [`IndexReader`].
|
||||
#[must_use]
|
||||
pub fn num_warming_threads(mut self, num_warming_threads: usize) -> IndexReaderBuilder {
|
||||
self.num_warming_threads = num_warming_threads;
|
||||
@@ -186,7 +186,7 @@ impl InnerIndexReader {
|
||||
searcher_generation_inventory,
|
||||
})
|
||||
}
|
||||
/// Opens the freshest segments `SegmentReader`.
|
||||
/// Opens the freshest segments [`SegmentReader`].
|
||||
///
|
||||
/// This function acquires a lot to prevent GC from removing files
|
||||
/// as we are opening our index.
|
||||
@@ -264,7 +264,7 @@ impl InnerIndexReader {
|
||||
/// you instances of `Searcher` for the last loaded version.
|
||||
///
|
||||
/// `Clone` does not clone the different pool of searcher. `IndexReader`
|
||||
/// just wraps and `Arc`.
|
||||
/// just wraps an `Arc`.
|
||||
#[derive(Clone)]
|
||||
pub struct IndexReader {
|
||||
inner: Arc<InnerIndexReader>,
|
||||
@@ -280,7 +280,7 @@ impl IndexReader {
|
||||
/// Update searchers so that they reflect the state of the last
|
||||
/// `.commit()`.
|
||||
///
|
||||
/// If you set up the `OnCommit` `ReloadPolicy` (which is the default)
|
||||
/// If you set up the [`ReloadPolicy::OnCommit`] (which is the default)
|
||||
/// every commit should be rapidly reflected on your `IndexReader` and you should
|
||||
/// not need to call `reload()` at all.
|
||||
///
|
||||
|
||||
@@ -27,8 +27,7 @@
|
||||
//!
|
||||
//! - at the segment level, the
|
||||
//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
|
||||
//! - at the index level, the
|
||||
//! [`Searcher`'s `doc` method](../struct.Searcher.html#method.doc)
|
||||
//! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method
|
||||
//!
|
||||
//! !
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ pub use self::whitespace_tokenizer::WhitespaceTokenizer;
|
||||
|
||||
/// Maximum authorized len (in bytes) for a token.
|
||||
///
|
||||
/// Tokenizer are in charge of not emitting tokens larger than this value.
|
||||
/// Tokenizers are in charge of not emitting tokens larger than this value.
|
||||
/// Currently, if a faulty tokenizer implementation emits tokens with a length larger than
|
||||
/// `2^16 - 1 - 5`, the token will simply be ignored downstream.
|
||||
pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5;
|
||||
|
||||
Reference in New Issue
Block a user