doc(termdict) expose structs (#1242)

* doc(termdict) expose structs
also add merger doc + lint
refs #1232
This commit is contained in:
Antoine G
2022-01-03 14:20:31 +01:00
committed by GitHub
parent e5e252cbc0
commit 3129d86743
2 changed files with 20 additions and 34 deletions

View File

@@ -10,7 +10,7 @@ use tantivy_fst::Streamer;
/// Given a list of sorted term streams,
/// returns an iterator over sorted unique terms.
///
/// The item yield is actually a pair with
/// The item yielded is actually a pair with
/// - the term
/// - a slice with the ordinal of the segments containing
/// the term.
@@ -39,6 +39,11 @@ impl<'a> TermMerger<'a> {
}
}
/// Iterator over (segment ordinal, [TermOrdinal]) sorted by segment ordinal
///
/// This method may be called
/// if [Self::advance] has been called before
/// and `true` was returned.
pub fn matching_segments<'b: 'a>(&'b self) -> impl 'b + Iterator<Item = (usize, TermOrdinal)> {
self.current_segment_and_term_ordinals
.iter()
@@ -46,8 +51,8 @@ impl<'a> TermMerger<'a> {
}
/// Advance the term iterator to the next term.
/// Returns true if there is indeed another term
/// False if there is none.
/// Returns `true` if there is indeed another term
/// `false` if there is none.
pub fn advance(&mut self) -> bool {
if let Some((k, values)) = self.union.next() {
self.current_key.clear();
@@ -66,17 +71,17 @@ impl<'a> TermMerger<'a> {
/// Returns the current term.
///
/// This method may be called
/// iff advance() has been called before
/// and "true" was returned.
/// if [Self::advance] has been called before
/// and `true` was returned.
pub fn key(&self) -> &[u8] {
&self.current_key
}
/// Iterator over (segment ordinal, TermInfo) pairs iterator sorted by the ordinal.
/// Iterator over (segment ordinal, [TermInfo]) pairs iterator sorted by the ordinal.
///
/// This method may be called
/// iff advance() has been called before
/// and "true" was returned.
/// if [Self::advance] has been called before
/// and `true` was returned.
pub fn current_segment_ords_and_term_infos<'b: 'a>(
&'b self,
) -> impl 'b + Iterator<Item = (usize, TermInfo)> {

View File

@@ -6,7 +6,7 @@ about the term.
Internally, the term dictionary relies on the `fst` crate to store
a sorted mapping that associate each term to its rank in the lexicographical order.
For instance, in a dictionary containing the sorted terms "abba", "bjork", "blur" and "donovan",
the `TermOrdinal` are respectively `0`, `1`, `2`, and `3`.
the [TermOrdinal] are respectively `0`, `1`, `2`, and `3`.
For `u64`-terms, tantivy explicitely uses a `BigEndian` representation to ensure that the
lexicographical order matches the natural order of integers.
@@ -20,37 +20,18 @@ as `u64`.
A second datastructure makes it possible to access a [`TermInfo`](../postings/struct.TermInfo.html).
*/
use tantivy_fst::automaton::AlwaysMatch;
mod fst_termdict;
use fst_termdict as termdict;
mod merger;
#[cfg(test)]
mod tests;
pub use self::merger::TermMerger;
pub use self::termdict::TermDictionary;
pub use self::termdict::TermDictionaryBuilder;
pub use self::termdict::TermStreamer;
/// Position of the term in the sorted list of terms.
pub type TermOrdinal = u64;
/// The term dictionary contains all of the terms in
/// `tantivy index` in a sorted manner.
pub type TermDictionary = self::termdict::TermDictionary;
/// Builder for the new term dictionary.
///
/// Inserting must be done in the order of the `keys`.
pub type TermDictionaryBuilder<W> = self::termdict::TermDictionaryBuilder<W>;
/// Given a list of sorted term streams,
/// returns an iterator over sorted unique terms.
///
/// The item yield is actually a pair with
/// - the term
/// - a slice with the ordinal of the segments containing
/// the terms.
pub type TermMerger<'a> = self::merger::TermMerger<'a>;
/// `TermStreamer` acts as a cursor over a range of terms of a segment.
/// Terms are guaranteed to be sorted.
pub type TermStreamer<'a, A = AlwaysMatch> = self::termdict::TermStreamer<'a, A>;
#[cfg(test)]
mod tests;