mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
doc(termdict) expose structs (#1242)
* doc(termdict) expose structs also add merger doc + lint refs #1232
This commit is contained in:
@@ -10,7 +10,7 @@ use tantivy_fst::Streamer;
|
||||
/// Given a list of sorted term streams,
|
||||
/// returns an iterator over sorted unique terms.
|
||||
///
|
||||
/// The item yield is actually a pair with
|
||||
/// The item yielded is actually a pair with
|
||||
/// - the term
|
||||
/// - a slice with the ordinal of the segments containing
|
||||
/// the term.
|
||||
@@ -39,6 +39,11 @@ impl<'a> TermMerger<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over (segment ordinal, [TermOrdinal]) sorted by segment ordinal
|
||||
///
|
||||
/// This method may be called
|
||||
/// if [Self::advance] has been called before
|
||||
/// and `true` was returned.
|
||||
pub fn matching_segments<'b: 'a>(&'b self) -> impl 'b + Iterator<Item = (usize, TermOrdinal)> {
|
||||
self.current_segment_and_term_ordinals
|
||||
.iter()
|
||||
@@ -46,8 +51,8 @@ impl<'a> TermMerger<'a> {
|
||||
}
|
||||
|
||||
/// Advance the term iterator to the next term.
|
||||
/// Returns true if there is indeed another term
|
||||
/// False if there is none.
|
||||
/// Returns `true` if there is indeed another term
|
||||
/// `false` if there is none.
|
||||
pub fn advance(&mut self) -> bool {
|
||||
if let Some((k, values)) = self.union.next() {
|
||||
self.current_key.clear();
|
||||
@@ -66,17 +71,17 @@ impl<'a> TermMerger<'a> {
|
||||
/// Returns the current term.
|
||||
///
|
||||
/// This method may be called
|
||||
/// iff advance() has been called before
|
||||
/// and "true" was returned.
|
||||
/// if [Self::advance] has been called before
|
||||
/// and `true` was returned.
|
||||
pub fn key(&self) -> &[u8] {
|
||||
&self.current_key
|
||||
}
|
||||
|
||||
/// Iterator over (segment ordinal, TermInfo) pairs iterator sorted by the ordinal.
|
||||
/// Iterator over (segment ordinal, [TermInfo]) pairs iterator sorted by the ordinal.
|
||||
///
|
||||
/// This method may be called
|
||||
/// iff advance() has been called before
|
||||
/// and "true" was returned.
|
||||
/// if [Self::advance] has been called before
|
||||
/// and `true` was returned.
|
||||
pub fn current_segment_ords_and_term_infos<'b: 'a>(
|
||||
&'b self,
|
||||
) -> impl 'b + Iterator<Item = (usize, TermInfo)> {
|
||||
|
||||
@@ -6,7 +6,7 @@ about the term.
|
||||
Internally, the term dictionary relies on the `fst` crate to store
|
||||
a sorted mapping that associate each term to its rank in the lexicographical order.
|
||||
For instance, in a dictionary containing the sorted terms "abba", "bjork", "blur" and "donovan",
|
||||
the `TermOrdinal` are respectively `0`, `1`, `2`, and `3`.
|
||||
the [TermOrdinal] are respectively `0`, `1`, `2`, and `3`.
|
||||
|
||||
For `u64`-terms, tantivy explicitely uses a `BigEndian` representation to ensure that the
|
||||
lexicographical order matches the natural order of integers.
|
||||
@@ -20,37 +20,18 @@ as `u64`.
|
||||
A second datastructure makes it possible to access a [`TermInfo`](../postings/struct.TermInfo.html).
|
||||
*/
|
||||
|
||||
use tantivy_fst::automaton::AlwaysMatch;
|
||||
|
||||
mod fst_termdict;
|
||||
use fst_termdict as termdict;
|
||||
|
||||
mod merger;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
pub use self::merger::TermMerger;
|
||||
pub use self::termdict::TermDictionary;
|
||||
pub use self::termdict::TermDictionaryBuilder;
|
||||
pub use self::termdict::TermStreamer;
|
||||
|
||||
/// Position of the term in the sorted list of terms.
|
||||
pub type TermOrdinal = u64;
|
||||
|
||||
/// The term dictionary contains all of the terms in
|
||||
/// `tantivy index` in a sorted manner.
|
||||
pub type TermDictionary = self::termdict::TermDictionary;
|
||||
|
||||
/// Builder for the new term dictionary.
|
||||
///
|
||||
/// Inserting must be done in the order of the `keys`.
|
||||
pub type TermDictionaryBuilder<W> = self::termdict::TermDictionaryBuilder<W>;
|
||||
|
||||
/// Given a list of sorted term streams,
|
||||
/// returns an iterator over sorted unique terms.
|
||||
///
|
||||
/// The item yield is actually a pair with
|
||||
/// - the term
|
||||
/// - a slice with the ordinal of the segments containing
|
||||
/// the terms.
|
||||
pub type TermMerger<'a> = self::merger::TermMerger<'a>;
|
||||
|
||||
/// `TermStreamer` acts as a cursor over a range of terms of a segment.
|
||||
/// Terms are guaranteed to be sorted.
|
||||
pub type TermStreamer<'a, A = AlwaysMatch> = self::termdict::TermStreamer<'a, A>;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
Reference in New Issue
Block a user