mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-22 18:19:58 +00:00
Fix typos again (#2753)
Found via `codespell -S benches,stopwords.rs -L womens,parth,abd,childs,ond,ser,ue,mot,hel,atleast,pris,claus,allo`
This commit is contained in:
@@ -78,7 +78,7 @@ This will slightly increase space and access time. [#2439](https://github.com/qu
|
||||
|
||||
- **Store DateTime as nanoseconds in doc store** DateTime in the doc store was truncated to microseconds previously. This removes this truncation, while still keeping backwards compatibility. [#2486](https://github.com/quickwit-oss/tantivy/pull/2486)(@PSeitz)
|
||||
|
||||
- **Performace/Memory**
|
||||
- **Performance/Memory**
|
||||
- lift clauses in LogicalAst for optimized ast during execution [#2449](https://github.com/quickwit-oss/tantivy/pull/2449)(@PSeitz)
|
||||
- Use Vec instead of BTreeMap to back OwnedValue object [#2364](https://github.com/quickwit-oss/tantivy/pull/2364)(@fulmicoton)
|
||||
- Replace TantivyDocument with CompactDoc. CompactDoc is much smaller and provides similar performance. [#2402](https://github.com/quickwit-oss/tantivy/pull/2402)(@PSeitz)
|
||||
|
||||
2
TODO.txt
2
TODO.txt
@@ -10,7 +10,7 @@ rename FastFieldReaders::open to load
|
||||
remove fast field reader
|
||||
|
||||
find a way to unify the two DateTime.
|
||||
readd type check in the filter wrapper
|
||||
re-add type check in the filter wrapper
|
||||
|
||||
add unit test on columnar list columns.
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ The crate introduces the following concepts.
|
||||
`Columnar` is an equivalent of a dataframe.
|
||||
It maps `column_key` to `Column`.
|
||||
|
||||
A `Column<T>` asssociates a `RowId` (u32) to any
|
||||
A `Column<T>` associates a `RowId` (u32) to any
|
||||
number of values.
|
||||
|
||||
This is made possible by wrapping a `ColumnIndex` and a `ColumnValue` object.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::fmt::Debug;
|
||||
use std::net::Ipv6Addr;
|
||||
|
||||
/// Montonic maps a value to u128 value space
|
||||
/// Monotonic maps a value to u128 value space
|
||||
/// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space.
|
||||
pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Debug + Send + Sync {
|
||||
/// Converts a value to u128.
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::column_values::ColumnValues;
|
||||
const MID_POINT: u64 = (1u64 << 32) - 1u64;
|
||||
|
||||
/// `Line` describes a line function `y: ax + b` using integer
|
||||
/// arithmetics.
|
||||
/// arithmetic.
|
||||
///
|
||||
/// The slope is in fact a decimal split into a 32 bit integer value,
|
||||
/// and a 32-bit decimal value.
|
||||
@@ -94,7 +94,7 @@ impl Line {
|
||||
// `(i, ys[])`.
|
||||
//
|
||||
// The best intercept therefore has the form
|
||||
// `y[i] - line.eval(i)` (using wrapping arithmetics).
|
||||
// `y[i] - line.eval(i)` (using wrapping arithmetic).
|
||||
// In other words, the best intercept is one of the `y - Line::eval(ys[i])`
|
||||
// and our task is just to pick the one that minimizes our error.
|
||||
//
|
||||
|
||||
@@ -52,7 +52,7 @@ pub trait ColumnCodecEstimator<T = u64>: 'static {
|
||||
) -> io::Result<()>;
|
||||
}
|
||||
|
||||
/// A column codec describes a colunm serialization format.
|
||||
/// A column codec describes a column serialization format.
|
||||
pub trait ColumnCodec<T: PartialOrd = u64> {
|
||||
/// Specialized `ColumnValues` type.
|
||||
type ColumnValues: ColumnValues<T> + 'static;
|
||||
|
||||
@@ -16,7 +16,7 @@ use super::{AggregationError, Key};
|
||||
use crate::TantivyError;
|
||||
|
||||
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
|
||||
/// The final aggegation result.
|
||||
/// The final aggregation result.
|
||||
pub struct AggregationResults(pub FxHashMap<String, AggregationResult>);
|
||||
|
||||
impl AggregationResults {
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::{DocId, SegmentReader, TantivyError};
|
||||
///
|
||||
/// # Implementation Requirements
|
||||
///
|
||||
/// Implementors must:
|
||||
/// Implementers must:
|
||||
/// 1. Derive `Debug`, `Clone`, `Serialize`, and `Deserialize`
|
||||
/// 2. Use `#[typetag::serde]` attribute on the impl block
|
||||
/// 3. Implement `build_query()` to construct the query from schema/tokenizers
|
||||
|
||||
@@ -550,7 +550,7 @@ trait TermAggregationMap: Clone + Debug + 'static {
|
||||
/// Estimate the memory consumption of this struct in bytes.
|
||||
fn get_memory_consumption(&self) -> usize;
|
||||
|
||||
/// Returns the bucket assocaited to a given term_id.
|
||||
/// Returns the bucket associated to a given term_id.
|
||||
fn term_entry(
|
||||
&mut self,
|
||||
term_id: u64,
|
||||
|
||||
@@ -62,7 +62,7 @@ impl ExtendedStatsAggregation {
|
||||
|
||||
/// Extended stats contains a collection of statistics
|
||||
/// they extends stats adding variance, standard deviation
|
||||
/// and bound informations
|
||||
/// and bound information
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ExtendedStats {
|
||||
/// The number of documents.
|
||||
|
||||
@@ -108,7 +108,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||
/// Opens a file and returns a boxed `FileHandle`.
|
||||
///
|
||||
/// Users of `Directory` should typically call `Directory::open_read(...)`,
|
||||
/// while `Directory` implementor should implement `get_file_handle()`.
|
||||
/// while `Directory` implementer should implement `get_file_handle()`.
|
||||
fn get_file_handle(&self, path: &Path) -> Result<Arc<dyn FileHandle>, OpenReadError>;
|
||||
|
||||
/// Once a virtual file is open, its data may not
|
||||
|
||||
@@ -104,7 +104,7 @@ pub enum TantivyError {
|
||||
#[error("{0:?}")]
|
||||
IncompatibleIndex(Incompatibility),
|
||||
/// An internal error occurred. This is are internal states that should not be reached.
|
||||
/// e.g. a datastructure is incorrectly inititalized.
|
||||
/// e.g. a datastructure is incorrectly initialized.
|
||||
#[error("Internal error: '{0}'")]
|
||||
InternalError(String),
|
||||
#[error("Deserialize error: {0}")]
|
||||
|
||||
@@ -125,7 +125,7 @@
|
||||
//!
|
||||
//! - **Searching**: [Searcher] searches the segments with anything that implements
|
||||
//! [Query](query::Query) and merges the results. The list of [supported
|
||||
//! queries](query::Query#implementors). Custom Queries are supported by implementing the
|
||||
//! queries](query::Query#implementers). Custom Queries are supported by implementing the
|
||||
//! [Query](query::Query) trait.
|
||||
//!
|
||||
//! - **[Directory](directory)**: Abstraction over the storage where the index data is stored.
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
//! significant amount of time when indexing by avoiding the additional allocations.
|
||||
//!
|
||||
//! ### Important Note
|
||||
//! The implementor of the `Document` trait must be `'static` and safe to send across
|
||||
//! The implementer of the `Document` trait must be `'static` and safe to send across
|
||||
//! thread boundaries.
|
||||
//!
|
||||
//! ## Reusing existing types
|
||||
@@ -107,7 +107,7 @@
|
||||
//!
|
||||
//! Values can just as easily be customised as documents by implementing the `Value` trait.
|
||||
//!
|
||||
//! The implementor of this type should not own the data it's returning, instead it should just
|
||||
//! The implementer of this type should not own the data it's returning, instead it should just
|
||||
//! hold references of the data held by the parent [Document] which can then be passed
|
||||
//! on to the [ReferenceValue].
|
||||
//!
|
||||
@@ -117,7 +117,7 @@
|
||||
//!
|
||||
//! ### A note about returning values
|
||||
//! The custom value type does not have to be the type stored by the document, instead the
|
||||
//! implementor of a `Value` can just be used as a way to convert between the owned type
|
||||
//! implementer of a `Value` can just be used as a way to convert between the owned type
|
||||
//! kept in the parent document, and the value passed into Tantivy.
|
||||
//!
|
||||
//! ```
|
||||
|
||||
@@ -102,7 +102,7 @@ fn can_block_match_automaton_with_start(
|
||||
let end_range = end_key[common_prefix_len];
|
||||
|
||||
// things starting with start_range were handled in match_range_start
|
||||
// this starting with end_range are handled bellow.
|
||||
// this starting with end_range are handled below.
|
||||
// this can run for 0 iteration in cases such as (abc, abd]
|
||||
for rb in (start_range + 1)..end_range {
|
||||
let new_state = automaton.accept(&base_state, rb);
|
||||
|
||||
@@ -141,7 +141,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
Ok(TSSTable::delta_reader(data))
|
||||
} else {
|
||||
// if operations are sync, we assume latency is almost null, and there is no point in
|
||||
// merging accross holes
|
||||
// merging across holes
|
||||
let blocks = self.get_block_iterator_for_range_and_automaton(key_range, automaton, 0);
|
||||
let data = blocks
|
||||
.map(|block_addr| self.sstable_slice.read_bytes_slice(block_addr.byte_range))
|
||||
|
||||
@@ -181,7 +181,7 @@ impl SSTableIndexV3 {
|
||||
// cannot match. this isn't as bad as it sounds given the fst is a lot smaller than the rest of the
|
||||
// sstable.
|
||||
// To do that, we can't use tantivy_fst's Stream with an automaton, as we need to know 2 consecutive
|
||||
// fst keys to form a proper opinion on whether this is a match, which we wan't translate into a
|
||||
// fst keys to form a proper opinion on whether this is a match, which we want translate into a
|
||||
// single automaton
|
||||
struct GetBlockForAutomaton<'a, A: Automaton> {
|
||||
streamer: tantivy_fst::map::Stream<'a>,
|
||||
|
||||
@@ -142,7 +142,7 @@ where
|
||||
}
|
||||
|
||||
/// Same as `into_stream_async`, but tries to issue a single io operation when requesting
|
||||
/// blocks that are not consecutive, but also less than `merge_holes_under_bytes` bytes appart.
|
||||
/// blocks that are not consecutive, but also less than `merge_holes_under_bytes` bytes apart.
|
||||
pub async fn into_stream_async_merging_holes(
|
||||
self,
|
||||
merge_holes_under_bytes: usize,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//! Tokenizer are in charge of chopping text into a stream of tokens
|
||||
//! ready for indexing. This is an separate crate from tantivy, so implementors don't need to update
|
||||
//! ready for indexing. This is an separate crate from tantivy, so implementers don't need to update
|
||||
//! for each new tantivy version.
|
||||
//!
|
||||
//! To add support for a tokenizer, implement the [`Tokenizer`] trait.
|
||||
|
||||
Reference in New Issue
Block a user