mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Documentation improvements.
Fix some linking, some grammar, some typos, etc.
This commit is contained in:
@@ -95,7 +95,7 @@ called [`Directory`](src/directory/directory.rs).
|
|||||||
Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object.
|
Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object.
|
||||||
Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details.
|
Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details.
|
||||||
|
|
||||||
Tantivy ships two main directory implementation: the `MMapDirectory` and the `RAMDirectory`,
|
Tantivy ships two main directory implementation: the `MmapDirectory` and the `RamDirectory`,
|
||||||
but users can extend tantivy with their own implementation.
|
but users can extend tantivy with their own implementation.
|
||||||
|
|
||||||
## [schema/](src/schema): What are documents?
|
## [schema/](src/schema): What are documents?
|
||||||
|
|||||||
@@ -55,13 +55,13 @@ const HIGHEST_BIT: u64 = 1 << 63;
|
|||||||
/// to values over 2^63, and all values end up requiring 64 bits.
|
/// to values over 2^63, and all values end up requiring 64 bits.
|
||||||
///
|
///
|
||||||
/// # See also
|
/// # See also
|
||||||
/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
|
/// The reverse mapping is [`u64_to_i64()`].
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn i64_to_u64(val: i64) -> u64 {
|
pub fn i64_to_u64(val: i64) -> u64 {
|
||||||
(val as u64) ^ HIGHEST_BIT
|
(val as u64) ^ HIGHEST_BIT
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
/// Reverse the mapping given by [`i64_to_u64()`].
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn u64_to_i64(val: u64) -> i64 {
|
pub fn u64_to_i64(val: u64) -> i64 {
|
||||||
(val ^ HIGHEST_BIT) as i64
|
(val ^ HIGHEST_BIT) as i64
|
||||||
@@ -83,7 +83,7 @@ pub fn u64_to_i64(val: u64) -> i64 {
|
|||||||
/// explains the mapping in a clear manner.
|
/// explains the mapping in a clear manner.
|
||||||
///
|
///
|
||||||
/// # See also
|
/// # See also
|
||||||
/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
|
/// The reverse mapping is [`u64_to_f64()`].
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn f64_to_u64(val: f64) -> u64 {
|
pub fn f64_to_u64(val: f64) -> u64 {
|
||||||
let bits = val.to_bits();
|
let bits = val.to_bits();
|
||||||
@@ -94,7 +94,7 @@ pub fn f64_to_u64(val: f64) -> u64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
|
/// Reverse the mapping given by [`f64_to_u64()`].
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn u64_to_f64(val: u64) -> f64 {
|
pub fn u64_to_f64(val: u64) -> f64 {
|
||||||
f64::from_bits(if val & HIGHEST_BIT != 0 {
|
f64::from_bits(if val & HIGHEST_BIT != 0 {
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Struct used to prevent from calling
|
/// Struct used to prevent from calling
|
||||||
/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
|
/// [`terminate_ref`](TerminatingWrite::terminate_ref) directly
|
||||||
///
|
///
|
||||||
/// The point is that while the type is public, it cannot be built by anyone
|
/// The point is that while the type is public, it cannot be built by anyone
|
||||||
/// outside of this module.
|
/// outside of this module.
|
||||||
|
|||||||
@@ -36,8 +36,7 @@ fn main() -> tantivy::Result<()> {
|
|||||||
// need to be able to be able to retrieve it
|
// need to be able to be able to retrieve it
|
||||||
// for our application.
|
// for our application.
|
||||||
//
|
//
|
||||||
// We can make our index lighter and
|
// We can make our index lighter by omitting the `STORED` flag.
|
||||||
// by omitting `STORED` flag.
|
|
||||||
let body = schema_builder.add_text_field("body", TEXT);
|
let body = schema_builder.add_text_field("body", TEXT);
|
||||||
|
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|
|
|||||||
/// Parses a field_name
|
/// Parses a field_name
|
||||||
/// A field name must have at least one character and be followed by a colon.
|
/// A field name must have at least one character and be followed by a colon.
|
||||||
/// All characters are allowed including special characters `SPECIAL_CHARS`, but these
|
/// All characters are allowed including special characters `SPECIAL_CHARS`, but these
|
||||||
/// need to be escaped with a backslack character '\'.
|
/// need to be escaped with a backslash character '\'.
|
||||||
fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
|
fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
|
||||||
static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
|
static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
|
||||||
Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
|
Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
|
||||||
@@ -68,7 +68,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
|
|||||||
///
|
///
|
||||||
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
|
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
|
||||||
/// We delegate rejecting such invalid dates to the logical AST computation code
|
/// We delegate rejecting such invalid dates to the logical AST computation code
|
||||||
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
|
/// which invokes `time::OffsetDateTime::parse(..., &Rfc3339)` on the value to actually parse
|
||||||
/// it (instead of merely extracting the datetime value as string as done here).
|
/// it (instead of merely extracting the datetime value as string as done here).
|
||||||
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
|
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
|
||||||
let two_digits = || recognize::<String, _, _>((digit(), digit()));
|
let two_digits = || recognize::<String, _, _>((digit(), digit()));
|
||||||
|
|||||||
@@ -67,10 +67,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
|
|||||||
/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
|
/// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
|
||||||
///
|
///
|
||||||
/// Once collection is finished, you can harvest its results in the form
|
/// Once collection is finished, you can harvest its results in the form
|
||||||
/// of a `FacetCounts` object, and extract your face t counts from it.
|
/// of a [`FacetCounts`] object, and extract your facet counts from it.
|
||||||
///
|
///
|
||||||
/// This implementation assumes you are working with a number of facets that
|
/// This implementation assumes you are working with a number of facets that
|
||||||
/// is much hundreds of time lower than your number of documents.
|
/// is many hundreds of times smaller than your number of documents.
|
||||||
///
|
///
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
@@ -231,7 +231,7 @@ impl FacetCollector {
|
|||||||
///
|
///
|
||||||
/// Adding two facets within which one is the prefix of the other is forbidden.
|
/// Adding two facets within which one is the prefix of the other is forbidden.
|
||||||
/// If you need the correct number of unique documents for two such facets,
|
/// If you need the correct number of unique documents for two such facets,
|
||||||
/// just add them in separate `FacetCollector`.
|
/// just add them in a separate `FacetCollector`.
|
||||||
pub fn add_facet<T>(&mut self, facet_from: T)
|
pub fn add_facet<T>(&mut self, facet_from: T)
|
||||||
where Facet: From<T> {
|
where Facet: From<T> {
|
||||||
let facet = Facet::from(facet_from);
|
let facet = Facet::from(facet_from);
|
||||||
@@ -391,7 +391,7 @@ impl<'a> Iterator for FacetChildIterator<'a> {
|
|||||||
|
|
||||||
impl FacetCounts {
|
impl FacetCounts {
|
||||||
/// Returns an iterator over all of the facet count pairs inside this result.
|
/// Returns an iterator over all of the facet count pairs inside this result.
|
||||||
/// See the documentation for [FacetCollector] for a usage example.
|
/// See the documentation for [`FacetCollector`] for a usage example.
|
||||||
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
|
pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
|
||||||
where Facet: From<T> {
|
where Facet: From<T> {
|
||||||
let facet = Facet::from(facet_from);
|
let facet = Facet::from(facet_from);
|
||||||
@@ -410,7 +410,7 @@ impl FacetCounts {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts.
|
/// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts.
|
||||||
/// See the documentation for [FacetCollector] for a usage example.
|
/// See the documentation for [`FacetCollector`] for a usage example.
|
||||||
pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
|
pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
|
||||||
where Facet: From<T> {
|
where Facet: From<T> {
|
||||||
let mut heap = BinaryHeap::with_capacity(k);
|
let mut heap = BinaryHeap::with_capacity(k);
|
||||||
|
|||||||
@@ -4,9 +4,9 @@
|
|||||||
//! In tantivy jargon, we call this information your search "fruit".
|
//! In tantivy jargon, we call this information your search "fruit".
|
||||||
//!
|
//!
|
||||||
//! Your fruit could for instance be :
|
//! Your fruit could for instance be :
|
||||||
//! - [the count of matching documents](./struct.Count.html)
|
//! - [the count of matching documents](crate::collector::Count)
|
||||||
//! - [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
|
//! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs)
|
||||||
//! - [facet counts](./struct.FacetCollector.html)
|
//! - [facet counts](FacetCollector)
|
||||||
//!
|
//!
|
||||||
//! At one point in your code, you will trigger the actual search operation by calling
|
//! At one point in your code, you will trigger the actual search operation by calling
|
||||||
//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
|
//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
|
||||||
|
|||||||
@@ -78,8 +78,8 @@ fn save_new_metas(
|
|||||||
|
|
||||||
/// IndexBuilder can be used to create an index.
|
/// IndexBuilder can be used to create an index.
|
||||||
///
|
///
|
||||||
/// Use in conjunction with `SchemaBuilder`. Global index settings
|
/// Use in conjunction with [`SchemaBuilder`][crate::schema::SchemaBuilder].
|
||||||
/// can be configured with `IndexSettings`
|
/// Global index settings can be configured with [`IndexSettings`].
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
@@ -97,7 +97,13 @@ fn save_new_metas(
|
|||||||
/// );
|
/// );
|
||||||
///
|
///
|
||||||
/// let schema = schema_builder.build();
|
/// let schema = schema_builder.build();
|
||||||
/// let settings = IndexSettings{sort_by_field: Some(IndexSortByField{field:"number".to_string(), order:Order::Asc}), ..Default::default()};
|
/// let settings = IndexSettings{
|
||||||
|
/// sort_by_field: Some(IndexSortByField{
|
||||||
|
/// field: "number".to_string(),
|
||||||
|
/// order: Order::Asc
|
||||||
|
/// }),
|
||||||
|
/// ..Default::default()
|
||||||
|
/// };
|
||||||
/// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
|
/// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
|
||||||
/// ```
|
/// ```
|
||||||
pub struct IndexBuilder {
|
pub struct IndexBuilder {
|
||||||
@@ -140,7 +146,7 @@ impl IndexBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index using the `RAMDirectory`.
|
/// Creates a new index using the [`RamDirectory`].
|
||||||
///
|
///
|
||||||
/// The index will be allocated in anonymous memory.
|
/// The index will be allocated in anonymous memory.
|
||||||
/// This should only be used for unit tests.
|
/// This should only be used for unit tests.
|
||||||
@@ -148,13 +154,14 @@ impl IndexBuilder {
|
|||||||
let ram_directory = RamDirectory::create();
|
let ram_directory = RamDirectory::create();
|
||||||
Ok(self
|
Ok(self
|
||||||
.create(ram_directory)
|
.create(ram_directory)
|
||||||
.expect("Creating a RAMDirectory should never fail"))
|
.expect("Creating a RamDirectory should never fail"))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index in a given filepath.
|
/// Creates a new index in a given filepath.
|
||||||
/// The index will use the `MMapDirectory`.
|
/// The index will use the [`MmapDirectory`].
|
||||||
///
|
///
|
||||||
/// If a previous index was in this directory, it returns an `IndexAlreadyExists` error.
|
/// If a previous index was in this directory, it returns an
|
||||||
|
/// [`TantivyError::IndexAlreadyExists`] error.
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
|
pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
|
||||||
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
|
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
|
||||||
@@ -185,12 +192,13 @@ impl IndexBuilder {
|
|||||||
|
|
||||||
/// Creates a new index in a temp directory.
|
/// Creates a new index in a temp directory.
|
||||||
///
|
///
|
||||||
/// The index will use the `MMapDirectory` in a newly created directory.
|
/// The index will use the [`MmapDirectory`] in a newly created directory.
|
||||||
/// The temp directory will be destroyed automatically when the `Index` object
|
/// The temp directory will be destroyed automatically when the [`Index`] object
|
||||||
/// is destroyed.
|
/// is destroyed.
|
||||||
///
|
///
|
||||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
/// The temp directory is only used for testing the [`MmapDirectory`].
|
||||||
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
/// For other unit tests, prefer the [`RamDirectory`], see:
|
||||||
|
/// [`IndexBuilder::create_in_ram()`].
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
pub fn create_from_tempdir(self) -> crate::Result<Index> {
|
pub fn create_from_tempdir(self) -> crate::Result<Index> {
|
||||||
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
|
let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
|
||||||
@@ -286,7 +294,7 @@ impl Index {
|
|||||||
self.set_multithread_executor(default_num_threads)
|
self.set_multithread_executor(default_num_threads)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index using the `RamDirectory`.
|
/// Creates a new index using the [`RamDirectory`].
|
||||||
///
|
///
|
||||||
/// The index will be allocated in anonymous memory.
|
/// The index will be allocated in anonymous memory.
|
||||||
/// This is useful for indexing small set of documents
|
/// This is useful for indexing small set of documents
|
||||||
@@ -296,9 +304,10 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index in a given filepath.
|
/// Creates a new index in a given filepath.
|
||||||
/// The index will use the `MMapDirectory`.
|
/// The index will use the [`MmapDirectory`].
|
||||||
///
|
///
|
||||||
/// If a previous index was in this directory, then it returns an `IndexAlreadyExists` error.
|
/// If a previous index was in this directory, then it returns
|
||||||
|
/// a [`TantivyError::IndexAlreadyExists`] error.
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
pub fn create_in_dir<P: AsRef<Path>>(
|
pub fn create_in_dir<P: AsRef<Path>>(
|
||||||
directory_path: P,
|
directory_path: P,
|
||||||
@@ -320,12 +329,13 @@ impl Index {
|
|||||||
|
|
||||||
/// Creates a new index in a temp directory.
|
/// Creates a new index in a temp directory.
|
||||||
///
|
///
|
||||||
/// The index will use the `MMapDirectory` in a newly created directory.
|
/// The index will use the [`MmapDirectory`] in a newly created directory.
|
||||||
/// The temp directory will be destroyed automatically when the `Index` object
|
/// The temp directory will be destroyed automatically when the [`Index`] object
|
||||||
/// is destroyed.
|
/// is destroyed.
|
||||||
///
|
///
|
||||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
/// The temp directory is only used for testing the [`MmapDirectory`].
|
||||||
/// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`.
|
/// For other unit tests, prefer the [`RamDirectory`],
|
||||||
|
/// see: [`IndexBuilder::create_in_ram()`].
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
|
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
|
||||||
IndexBuilder::new().schema(schema).create_from_tempdir()
|
IndexBuilder::new().schema(schema).create_from_tempdir()
|
||||||
@@ -345,7 +355,7 @@ impl Index {
|
|||||||
builder.create(dir)
|
builder.create(dir)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index given a directory and an `IndexMeta`.
|
/// Creates a new index given a directory and an [`IndexMeta`].
|
||||||
fn open_from_metas(
|
fn open_from_metas(
|
||||||
directory: ManagedDirectory,
|
directory: ManagedDirectory,
|
||||||
metas: &IndexMeta,
|
metas: &IndexMeta,
|
||||||
@@ -372,7 +382,7 @@ impl Index {
|
|||||||
&self.tokenizers
|
&self.tokenizers
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper to access the tokenizer associated to a specific field.
|
/// Get the tokenizer associated with a specific field.
|
||||||
pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> {
|
pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> {
|
||||||
let field_entry = self.schema.get_field_entry(field);
|
let field_entry = self.schema.get_field_entry(field);
|
||||||
let field_type = field_entry.field_type();
|
let field_type = field_entry.field_type();
|
||||||
@@ -404,14 +414,14 @@ impl Index {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a default `IndexReader` for the given index.
|
/// Create a default [`IndexReader`] for the given index.
|
||||||
///
|
///
|
||||||
/// See [`Index.reader_builder()`](#method.reader_builder).
|
/// See [`Index.reader_builder()`].
|
||||||
pub fn reader(&self) -> crate::Result<IndexReader> {
|
pub fn reader(&self) -> crate::Result<IndexReader> {
|
||||||
self.reader_builder().try_into()
|
self.reader_builder().try_into()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a `IndexReader` for the given index.
|
/// Create a [`IndexReader`] for the given index.
|
||||||
///
|
///
|
||||||
/// Most project should create at most one reader for a given index.
|
/// Most project should create at most one reader for a given index.
|
||||||
/// This method is typically called only once per `Index` instance.
|
/// This method is typically called only once per `Index` instance.
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ impl Searcher {
|
|||||||
/// Fetches a document from tantivy's store given a `DocAddress`.
|
/// Fetches a document from tantivy's store given a `DocAddress`.
|
||||||
///
|
///
|
||||||
/// The searcher uses the segment ordinal to route the
|
/// The searcher uses the segment ordinal to route the
|
||||||
/// the request to the right `Segment`.
|
/// request to the right `Segment`.
|
||||||
pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
|
pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
|
||||||
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
|
||||||
store_reader.get(doc_address.doc_id)
|
store_reader.get(doc_address.doc_id)
|
||||||
|
|||||||
@@ -117,9 +117,9 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
/// change.
|
/// change.
|
||||||
///
|
///
|
||||||
/// Specifically, subsequent writes or flushes should
|
/// Specifically, subsequent writes or flushes should
|
||||||
/// have no effect on the returned `FileSlice` object.
|
/// have no effect on the returned [`FileSlice`] object.
|
||||||
///
|
///
|
||||||
/// You should only use this to read files create with [Directory::open_write].
|
/// You should only use this to read files create with [`Directory::open_write()`].
|
||||||
fn open_read(&self, path: &Path) -> Result<FileSlice, OpenReadError> {
|
fn open_read(&self, path: &Path) -> Result<FileSlice, OpenReadError> {
|
||||||
let file_handle = self.get_file_handle(path)?;
|
let file_handle = self.get_file_handle(path)?;
|
||||||
Ok(FileSlice::new(file_handle))
|
Ok(FileSlice::new(file_handle))
|
||||||
@@ -128,27 +128,28 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
/// Removes a file
|
/// Removes a file
|
||||||
///
|
///
|
||||||
/// Removing a file will not affect an eventual
|
/// Removing a file will not affect an eventual
|
||||||
/// existing FileSlice pointing to it.
|
/// existing [`FileSlice`] pointing to it.
|
||||||
///
|
///
|
||||||
/// Removing a nonexistent file, yields a
|
/// Removing a nonexistent file, returns a
|
||||||
/// `DeleteError::DoesNotExist`.
|
/// [`DeleteError::FileDoesNotExist`].
|
||||||
fn delete(&self, path: &Path) -> Result<(), DeleteError>;
|
fn delete(&self, path: &Path) -> Result<(), DeleteError>;
|
||||||
|
|
||||||
/// Returns true if and only if the file exists
|
/// Returns true if and only if the file exists
|
||||||
fn exists(&self, path: &Path) -> Result<bool, OpenReadError>;
|
fn exists(&self, path: &Path) -> Result<bool, OpenReadError>;
|
||||||
|
|
||||||
/// Opens a writer for the *virtual file* associated with
|
/// Opens a writer for the *virtual file* associated with
|
||||||
/// a Path.
|
/// a [`Path`].
|
||||||
///
|
///
|
||||||
/// Right after this call, for the span of the execution of the program
|
/// Right after this call, for the span of the execution of the program
|
||||||
/// the file should be created and any subsequent call to `open_read` for the
|
/// the file should be created and any subsequent call to
|
||||||
/// same path should return a `FileSlice`.
|
/// [`Directory::open_read()`] for the same path should return
|
||||||
|
/// a [`FileSlice`].
|
||||||
///
|
///
|
||||||
/// However, depending on the directory implementation,
|
/// However, depending on the directory implementation,
|
||||||
/// it might be required to call `sync_directory` to ensure
|
/// it might be required to call [`Directory::sync_directory()`] to ensure
|
||||||
/// that the file is durably created.
|
/// that the file is durably created.
|
||||||
/// (The semantics here are the same when dealing with
|
/// (The semantics here are the same when dealing with
|
||||||
/// a posix filesystem.)
|
/// a POSIX filesystem.)
|
||||||
///
|
///
|
||||||
/// Write operations may be aggressively buffered.
|
/// Write operations may be aggressively buffered.
|
||||||
/// The client of this trait is responsible for calling flush
|
/// The client of this trait is responsible for calling flush
|
||||||
@@ -157,19 +158,19 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
///
|
///
|
||||||
/// Flush operation should also be persistent.
|
/// Flush operation should also be persistent.
|
||||||
///
|
///
|
||||||
/// The user shall not rely on `Drop` triggering `flush`.
|
/// The user shall not rely on [`Drop`] triggering `flush`.
|
||||||
/// Note that `RamDirectory` will panic! if `flush`
|
/// Note that [`RamDirectory`][crate::directory::RamDirectory] will
|
||||||
/// was not called.
|
/// panic! if `flush` was not called.
|
||||||
///
|
///
|
||||||
/// The file may not previously exist.
|
/// The file may not previously exist.
|
||||||
fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError>;
|
fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError>;
|
||||||
|
|
||||||
/// Reads the full content file that has been written using
|
/// Reads the full content file that has been written using
|
||||||
/// atomic_write.
|
/// [`Directory::atomic_write()`].
|
||||||
///
|
///
|
||||||
/// This should only be used for small files.
|
/// This should only be used for small files.
|
||||||
///
|
///
|
||||||
/// You should only use this to read files create with [Directory::atomic_write].
|
/// You should only use this to read files create with [`Directory::atomic_write()`].
|
||||||
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;
|
fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError>;
|
||||||
|
|
||||||
/// Atomically replace the content of a file with data.
|
/// Atomically replace the content of a file with data.
|
||||||
@@ -188,7 +189,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
|
|
||||||
/// Acquire a lock in the given directory.
|
/// Acquire a lock in the given directory.
|
||||||
///
|
///
|
||||||
/// The method is blocking or not depending on the `Lock` object.
|
/// The method is blocking or not depending on the [`Lock`] object.
|
||||||
fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
|
fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
|
||||||
let mut box_directory = self.box_clone();
|
let mut box_directory = self.box_clone();
|
||||||
let mut retry_policy = retry_policy(lock.is_blocking);
|
let mut retry_policy = retry_policy(lock.is_blocking);
|
||||||
@@ -210,15 +211,15 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Registers a callback that will be called whenever a change on the `meta.json`
|
/// Registers a callback that will be called whenever a change on the `meta.json`
|
||||||
/// using the `atomic_write` API is detected.
|
/// using the [`Directory::atomic_write()`] API is detected.
|
||||||
///
|
///
|
||||||
/// The behavior when using `.watch()` on a file using [Directory::open_write] is, on the other
|
/// The behavior when using `.watch()` on a file using [`Directory::open_write()`] is, on the
|
||||||
/// hand, undefined.
|
/// other hand, undefined.
|
||||||
///
|
///
|
||||||
/// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
|
/// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is
|
||||||
/// required to keep it.
|
/// required to keep it.
|
||||||
/// It does not override previous callbacks. When the file is modified, all callback that are
|
/// It does not override previous callbacks. When the file is modified, all callback that are
|
||||||
/// registered (and whose `WatchHandle` is still alive) are triggered.
|
/// registered (and whose [`WatchHandle`] is still alive) are triggered.
|
||||||
///
|
///
|
||||||
/// Internally, tantivy only uses this API to detect new commits to implement the
|
/// Internally, tantivy only uses this API to detect new commits to implement the
|
||||||
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
/// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ use once_cell::sync::Lazy;
|
|||||||
/// [`LockParams`](./enum.LockParams.html).
|
/// [`LockParams`](./enum.LockParams.html).
|
||||||
/// Tantivy itself uses only two locks but client application
|
/// Tantivy itself uses only two locks but client application
|
||||||
/// can use the directory facility to define their own locks.
|
/// can use the directory facility to define their own locks.
|
||||||
/// - [INDEX_WRITER_LOCK]
|
/// - [`INDEX_WRITER_LOCK`]
|
||||||
/// - [META_LOCK]
|
/// - [`META_LOCK`]
|
||||||
///
|
///
|
||||||
/// Check out these locks documentation for more information.
|
/// Check out these locks documentation for more information.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -30,7 +30,8 @@ pub struct Lock {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Only one process should be able to write tantivy's index at a time.
|
/// Only one process should be able to write tantivy's index at a time.
|
||||||
/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter.
|
/// This lock file, when present, is in charge of preventing other processes to open an
|
||||||
|
/// `IndexWriter`.
|
||||||
///
|
///
|
||||||
/// If the process is killed and this file remains, it is safe to remove it manually.
|
/// If the process is killed and this file remains, it is safe to remove it manually.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -56,10 +56,10 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
|
|||||||
|
|
||||||
#[derive(Default, Clone, Debug, Serialize, Deserialize)]
|
#[derive(Default, Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct CacheCounters {
|
pub struct CacheCounters {
|
||||||
// Number of time the cache prevents to call `mmap`
|
/// Number of time the cache prevents to call `mmap`
|
||||||
pub hit: usize,
|
pub hit: usize,
|
||||||
// Number of time tantivy had to call `mmap`
|
/// Number of time tantivy had to call `mmap`
|
||||||
// as no entry was in the cache.
|
/// as no entry was in the cache.
|
||||||
pub miss: usize,
|
pub miss: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ use crate::directory::{
|
|||||||
WatchHandle, WritePtr,
|
WatchHandle, WritePtr,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Writer associated with the `RamDirectory`
|
/// Writer associated with the [`RamDirectory`].
|
||||||
///
|
///
|
||||||
/// The Writer just writes a buffer.
|
/// The Writer just writes a buffer.
|
||||||
struct VecWriter {
|
struct VecWriter {
|
||||||
@@ -137,17 +137,17 @@ impl RamDirectory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the sum of the size of the different files
|
/// Returns the sum of the size of the different files
|
||||||
/// in the RamDirectory.
|
/// in the [`RamDirectory`].
|
||||||
pub fn total_mem_usage(&self) -> usize {
|
pub fn total_mem_usage(&self) -> usize {
|
||||||
self.fs.read().unwrap().total_mem_usage()
|
self.fs.read().unwrap().total_mem_usage()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write a copy of all of the files saved in the RamDirectory in the target `Directory`.
|
/// Write a copy of all of the files saved in the [`RamDirectory`] in the target [`Directory`].
|
||||||
///
|
///
|
||||||
/// Files are all written using the `Directory::write` meaning, even if they were
|
/// Files are all written using the [`Directory::open_write()`] meaning, even if they were
|
||||||
/// written using the `atomic_write` api.
|
/// written using the [`Directory::atomic_write()`] api.
|
||||||
///
|
///
|
||||||
/// If an error is encounterred, files may be persisted partially.
|
/// If an error is encountered, files may be persisted partially.
|
||||||
pub fn persist(&self, dest: &dyn Directory) -> crate::Result<()> {
|
pub fn persist(&self, dest: &dyn Directory) -> crate::Result<()> {
|
||||||
let wlock = self.fs.write().unwrap();
|
let wlock = self.fs.write().unwrap();
|
||||||
for (path, file) in wlock.fs.iter() {
|
for (path, file) in wlock.fs.iter() {
|
||||||
|
|||||||
@@ -13,15 +13,17 @@ use crate::DocId;
|
|||||||
/// This `BytesFastFieldWriter` is only useful for advanced users.
|
/// This `BytesFastFieldWriter` is only useful for advanced users.
|
||||||
/// The normal way to get your associated bytes in your index
|
/// The normal way to get your associated bytes in your index
|
||||||
/// is to
|
/// is to
|
||||||
/// - declare your field with fast set to `Cardinality::SingleValue`
|
/// - declare your field with fast set to
|
||||||
|
/// [`Cardinality::SingleValue`](crate::schema::Cardinality::SingleValue)
|
||||||
/// in your schema
|
/// in your schema
|
||||||
/// - add your document simply by calling `.add_document(...)` with associating bytes to the field.
|
/// - add your document simply by calling `.add_document(...)` with associating bytes to the field.
|
||||||
///
|
///
|
||||||
/// The `BytesFastFieldWriter` can be acquired from the
|
/// The `BytesFastFieldWriter` can be acquired from the
|
||||||
/// fast field writer by calling
|
/// fast field writer by calling
|
||||||
/// [`.get_bytes_writer(...)`](./struct.FastFieldsWriter.html#method.get_bytes_writer).
|
/// [`.get_bytes_writer_mut(...)`](crate::fastfield::FastFieldsWriter::get_bytes_writer_mut).
|
||||||
///
|
///
|
||||||
/// Once acquired, writing is done by calling `.add_document_val(&[u8])`
|
/// Once acquired, writing is done by calling
|
||||||
|
/// [`.add_document_val(&[u8])`](BytesFastFieldWriter::add_document_val)
|
||||||
/// once per document, even if there are no bytes associated to it.
|
/// once per document, even if there are no bytes associated to it.
|
||||||
pub struct BytesFastFieldWriter {
|
pub struct BytesFastFieldWriter {
|
||||||
field: Field,
|
field: Field,
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ pub const MARGIN_IN_BYTES: usize = 1_000_000;
|
|||||||
pub const MEMORY_ARENA_NUM_BYTES_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize;
|
pub const MEMORY_ARENA_NUM_BYTES_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize;
|
||||||
pub const MEMORY_ARENA_NUM_BYTES_MAX: usize = u32::MAX as usize - MARGIN_IN_BYTES;
|
pub const MEMORY_ARENA_NUM_BYTES_MAX: usize = u32::MAX as usize - MARGIN_IN_BYTES;
|
||||||
|
|
||||||
// We impose the number of index writer thread to be at most this.
|
// We impose the number of index writer threads to be at most this.
|
||||||
pub const MAX_NUM_THREAD: usize = 8;
|
pub const MAX_NUM_THREAD: usize = 8;
|
||||||
|
|
||||||
// Add document will block if the number of docs waiting in the queue to be indexed
|
// Add document will block if the number of docs waiting in the queue to be indexed
|
||||||
@@ -40,7 +40,7 @@ const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000;
|
|||||||
|
|
||||||
fn error_in_index_worker_thread(context: &str) -> TantivyError {
|
fn error_in_index_worker_thread(context: &str) -> TantivyError {
|
||||||
TantivyError::ErrorInThread(format!(
|
TantivyError::ErrorInThread(format!(
|
||||||
"{}. A worker thread encounterred an error (io::Error most likely) or panicked.",
|
"{}. A worker thread encountered an error (io::Error most likely) or panicked.",
|
||||||
context
|
context
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
@@ -49,7 +49,7 @@ fn error_in_index_worker_thread(context: &str) -> TantivyError {
|
|||||||
///
|
///
|
||||||
/// It manages a small number of indexing thread, as well as a shared
|
/// It manages a small number of indexing thread, as well as a shared
|
||||||
/// indexing queue.
|
/// indexing queue.
|
||||||
/// Each indexing thread builds its own independent `Segment`, via
|
/// Each indexing thread builds its own independent [`Segment`], via
|
||||||
/// a `SegmentWriter` object.
|
/// a `SegmentWriter` object.
|
||||||
pub struct IndexWriter {
|
pub struct IndexWriter {
|
||||||
// the lock is just used to bind the
|
// the lock is just used to bind the
|
||||||
@@ -385,8 +385,8 @@ impl IndexWriter {
|
|||||||
.operation_receiver()
|
.operation_receiver()
|
||||||
.ok_or_else(|| {
|
.ok_or_else(|| {
|
||||||
crate::TantivyError::ErrorInThread(
|
crate::TantivyError::ErrorInThread(
|
||||||
"The index writer was killed. It can happen if an indexing worker \
|
"The index writer was killed. It can happen if an indexing worker encountered \
|
||||||
encounterred an Io error for instance."
|
an Io error for instance."
|
||||||
.to_string(),
|
.to_string(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@@ -595,14 +595,14 @@ impl IndexWriter {
|
|||||||
/// * `.commit()`: to accept this commit
|
/// * `.commit()`: to accept this commit
|
||||||
/// * `.abort()`: to cancel this commit.
|
/// * `.abort()`: to cancel this commit.
|
||||||
///
|
///
|
||||||
/// In the current implementation, `PreparedCommit` borrows
|
/// In the current implementation, [`PreparedCommit`] borrows
|
||||||
/// the `IndexWriter` mutably so we are guaranteed that no new
|
/// the [`IndexWriter`] mutably so we are guaranteed that no new
|
||||||
/// document can be added as long as it is committed or is
|
/// document can be added as long as it is committed or is
|
||||||
/// dropped.
|
/// dropped.
|
||||||
///
|
///
|
||||||
/// It is also possible to add a payload to the `commit`
|
/// It is also possible to add a payload to the `commit`
|
||||||
/// using this API.
|
/// using this API.
|
||||||
/// See [`PreparedCommit::set_payload()`](PreparedCommit.html)
|
/// See [`PreparedCommit::set_payload()`].
|
||||||
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> {
|
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit> {
|
||||||
// Here, because we join all of the worker threads,
|
// Here, because we join all of the worker threads,
|
||||||
// all of the segment update for this commit have been
|
// all of the segment update for this commit have been
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
//! In "The beauty and the beast", the term "the" appears in position 0 and position 3.
|
//! In "The beauty and the beast", the term "the" appears in position 0 and position 3.
|
||||||
//! This information is useful to run phrase queries.
|
//! This information is useful to run phrase queries.
|
||||||
//!
|
//!
|
||||||
//! The [position](../enum.SegmentComponent.html#variant.Positions) file contains all of the
|
//! The [position](crate::SegmentComponent::Positions) file contains all of the
|
||||||
//! bitpacked positions delta, for all terms of a given field, one term after the other.
|
//! bitpacked positions delta, for all terms of a given field, one term after the other.
|
||||||
//!
|
//!
|
||||||
//! Each term is encoded independently.
|
//! Each term is encoded independently.
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
|||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// the `start` argument is just used to hint that the response is
|
/// the `start` argument is just used to hint that the response is
|
||||||
/// greater than beyond `start`. the implementation may or may not use
|
/// greater than beyond `start`. The implementation may or may not use
|
||||||
/// it for optimization.
|
/// it for optimization.
|
||||||
///
|
///
|
||||||
/// # Assumption
|
/// # Assumption
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ impl PhraseQuery {
|
|||||||
self.slop = value;
|
self.slop = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The `Field` this `PhraseQuery` is targeting.
|
/// The [`Field`] this `PhraseQuery` is targeting.
|
||||||
pub fn field(&self) -> Field {
|
pub fn field(&self) -> Field {
|
||||||
self.field
|
self.field
|
||||||
}
|
}
|
||||||
@@ -85,10 +85,10 @@ impl PhraseQuery {
|
|||||||
.collect::<Vec<Term>>()
|
.collect::<Vec<Term>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the `PhraseWeight` for the given phrase query given a specific `searcher`.
|
/// Returns the [`PhraseWeight`] for the given phrase query given a specific `searcher`.
|
||||||
///
|
///
|
||||||
/// This function is the same as `.weight(...)` except it returns
|
/// This function is the same as [`Query::weight()`] except it returns
|
||||||
/// a specialized type `PhraseWeight` instead of a Boxed trait.
|
/// a specialized type [`PhraseWeight`] instead of a Boxed trait.
|
||||||
pub(crate) fn phrase_weight(
|
pub(crate) fn phrase_weight(
|
||||||
&self,
|
&self,
|
||||||
searcher: &Searcher,
|
searcher: &Searcher,
|
||||||
@@ -121,7 +121,7 @@ impl PhraseQuery {
|
|||||||
impl Query for PhraseQuery {
|
impl Query for PhraseQuery {
|
||||||
/// Create the weight associated to a query.
|
/// Create the weight associated to a query.
|
||||||
///
|
///
|
||||||
/// See [`Weight`](./trait.Weight.html).
|
/// See [`Weight`].
|
||||||
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
|
fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result<Box<dyn Weight>> {
|
||||||
let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?;
|
let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?;
|
||||||
Ok(Box::new(phrase_weight))
|
Ok(Box::new(phrase_weight))
|
||||||
|
|||||||
@@ -41,10 +41,12 @@ pub(crate) fn for_each_pruning_scorer<TScorer: Scorer + ?Sized>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Weight is the specialization of a Query
|
/// A Weight is the specialization of a `Query`
|
||||||
/// for a given set of segments.
|
/// for a given set of segments.
|
||||||
///
|
///
|
||||||
/// See [`Query`](./trait.Query.html).
|
/// See [`Query`].
|
||||||
|
///
|
||||||
|
/// [`Query`]: crate::query::Query
|
||||||
pub trait Weight: Send + Sync + 'static {
|
pub trait Weight: Send + Sync + 'static {
|
||||||
/// Returns the scorer for the given segment.
|
/// Returns the scorer for the given segment.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ pub enum ReloadPolicy {
|
|||||||
/// The index is entirely reloaded manually.
|
/// The index is entirely reloaded manually.
|
||||||
/// All updates of the index should be manual.
|
/// All updates of the index should be manual.
|
||||||
///
|
///
|
||||||
/// No change is reflected automatically. You are required to call `IndexReader::reload()`
|
/// No change is reflected automatically. You are required to call [`IndexReader::reload()`]
|
||||||
/// manually.
|
/// manually.
|
||||||
Manual,
|
Manual,
|
||||||
/// The index is reloaded within milliseconds after a new commit is available.
|
/// The index is reloaded within milliseconds after a new commit is available.
|
||||||
@@ -31,11 +31,11 @@ pub enum ReloadPolicy {
|
|||||||
OnCommit, // TODO add NEAR_REAL_TIME(target_ms)
|
OnCommit, // TODO add NEAR_REAL_TIME(target_ms)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// [IndexReader] builder
|
/// [`IndexReader`] builder
|
||||||
///
|
///
|
||||||
/// It makes it possible to configure:
|
/// It makes it possible to configure:
|
||||||
/// - [ReloadPolicy] defining when new index versions are detected
|
/// - [`ReloadPolicy`] defining when new index versions are detected
|
||||||
/// - [Warmer] implementations
|
/// - [`Warmer`] implementations
|
||||||
/// - number of warming threads, for parallelizing warming work
|
/// - number of warming threads, for parallelizing warming work
|
||||||
/// - The cache size of the underlying doc store readers.
|
/// - The cache size of the underlying doc store readers.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -108,7 +108,7 @@ impl IndexReaderBuilder {
|
|||||||
|
|
||||||
/// Sets the reload_policy.
|
/// Sets the reload_policy.
|
||||||
///
|
///
|
||||||
/// See [`ReloadPolicy`](./enum.ReloadPolicy.html) for more details.
|
/// See [`ReloadPolicy`] for more details.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder {
|
pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder {
|
||||||
self.reload_policy = reload_policy;
|
self.reload_policy = reload_policy;
|
||||||
@@ -133,8 +133,8 @@ impl IndexReaderBuilder {
|
|||||||
|
|
||||||
/// Sets the number of warming threads.
|
/// Sets the number of warming threads.
|
||||||
///
|
///
|
||||||
/// This allows parallelizing warming work when there are multiple [Warmer] registered with the
|
/// This allows parallelizing warming work when there are multiple [`Warmer`] registered with
|
||||||
/// [IndexReader].
|
/// the [`IndexReader`].
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn num_warming_threads(mut self, num_warming_threads: usize) -> IndexReaderBuilder {
|
pub fn num_warming_threads(mut self, num_warming_threads: usize) -> IndexReaderBuilder {
|
||||||
self.num_warming_threads = num_warming_threads;
|
self.num_warming_threads = num_warming_threads;
|
||||||
@@ -186,7 +186,7 @@ impl InnerIndexReader {
|
|||||||
searcher_generation_inventory,
|
searcher_generation_inventory,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
/// Opens the freshest segments `SegmentReader`.
|
/// Opens the freshest segments [`SegmentReader`].
|
||||||
///
|
///
|
||||||
/// This function acquires a lot to prevent GC from removing files
|
/// This function acquires a lot to prevent GC from removing files
|
||||||
/// as we are opening our index.
|
/// as we are opening our index.
|
||||||
@@ -264,7 +264,7 @@ impl InnerIndexReader {
|
|||||||
/// you instances of `Searcher` for the last loaded version.
|
/// you instances of `Searcher` for the last loaded version.
|
||||||
///
|
///
|
||||||
/// `Clone` does not clone the different pool of searcher. `IndexReader`
|
/// `Clone` does not clone the different pool of searcher. `IndexReader`
|
||||||
/// just wraps and `Arc`.
|
/// just wraps an `Arc`.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct IndexReader {
|
pub struct IndexReader {
|
||||||
inner: Arc<InnerIndexReader>,
|
inner: Arc<InnerIndexReader>,
|
||||||
@@ -280,7 +280,7 @@ impl IndexReader {
|
|||||||
/// Update searchers so that they reflect the state of the last
|
/// Update searchers so that they reflect the state of the last
|
||||||
/// `.commit()`.
|
/// `.commit()`.
|
||||||
///
|
///
|
||||||
/// If you set up the `OnCommit` `ReloadPolicy` (which is the default)
|
/// If you set up the [`ReloadPolicy::OnCommit`] (which is the default)
|
||||||
/// every commit should be rapidly reflected on your `IndexReader` and you should
|
/// every commit should be rapidly reflected on your `IndexReader` and you should
|
||||||
/// not need to call `reload()` at all.
|
/// not need to call `reload()` at all.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -27,8 +27,7 @@
|
|||||||
//!
|
//!
|
||||||
//! - at the segment level, the
|
//! - at the segment level, the
|
||||||
//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
|
//! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc)
|
||||||
//! - at the index level, the
|
//! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method
|
||||||
//! [`Searcher`'s `doc` method](../struct.Searcher.html#method.doc)
|
|
||||||
//!
|
//!
|
||||||
//! !
|
//! !
|
||||||
|
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ pub use self::whitespace_tokenizer::WhitespaceTokenizer;
|
|||||||
|
|
||||||
/// Maximum authorized len (in bytes) for a token.
|
/// Maximum authorized len (in bytes) for a token.
|
||||||
///
|
///
|
||||||
/// Tokenizer are in charge of not emitting tokens larger than this value.
|
/// Tokenizers are in charge of not emitting tokens larger than this value.
|
||||||
/// Currently, if a faulty tokenizer implementation emits tokens with a length larger than
|
/// Currently, if a faulty tokenizer implementation emits tokens with a length larger than
|
||||||
/// `2^16 - 1 - 5`, the token will simply be ignored downstream.
|
/// `2^16 - 1 - 5`, the token will simply be ignored downstream.
|
||||||
pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5;
|
pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5;
|
||||||
|
|||||||
Reference in New Issue
Block a user