diff --git a/CHANGELOG.md b/CHANGELOG.md index 9923a4582..bffa180e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,7 +78,7 @@ This will slightly increase space and access time. [#2439](https://github.com/qu - **Store DateTime as nanoseconds in doc store** DateTime in the doc store was truncated to microseconds previously. This removes this truncation, while still keeping backwards compatibility. [#2486](https://github.com/quickwit-oss/tantivy/pull/2486)(@PSeitz) -- **Performace/Memory** +- **Performance/Memory** - lift clauses in LogicalAst for optimized ast during execution [#2449](https://github.com/quickwit-oss/tantivy/pull/2449)(@PSeitz) - Use Vec instead of BTreeMap to back OwnedValue object [#2364](https://github.com/quickwit-oss/tantivy/pull/2364)(@fulmicoton) - Replace TantivyDocument with CompactDoc. CompactDoc is much smaller and provides similar performance. [#2402](https://github.com/quickwit-oss/tantivy/pull/2402)(@PSeitz) diff --git a/TODO.txt b/TODO.txt index 14b1d849b..469a7455e 100644 --- a/TODO.txt +++ b/TODO.txt @@ -10,7 +10,7 @@ rename FastFieldReaders::open to load remove fast field reader find a way to unify the two DateTime. -readd type check in the filter wrapper +re-add type check in the filter wrapper add unit test on columnar list columns. diff --git a/columnar/README.md b/columnar/README.md index fa86cdea0..543214557 100644 --- a/columnar/README.md +++ b/columnar/README.md @@ -73,7 +73,7 @@ The crate introduces the following concepts. `Columnar` is an equivalent of a dataframe. It maps `column_key` to `Column`. -A `Column` asssociates a `RowId` (u32) to any +A `Column` associates a `RowId` (u32) to any number of values. This is made possible by wrapping a `ColumnIndex` and a `ColumnValue` object. diff --git a/columnar/src/column_values/monotonic_mapping_u128.rs b/columnar/src/column_values/monotonic_mapping_u128.rs index 4d59c6750..9e16dc58c 100644 --- a/columnar/src/column_values/monotonic_mapping_u128.rs +++ b/columnar/src/column_values/monotonic_mapping_u128.rs @@ -1,7 +1,7 @@ use std::fmt::Debug; use std::net::Ipv6Addr; -/// Montonic maps a value to u128 value space +/// Monotonic maps a value to u128 value space /// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space. pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Debug + Send + Sync { /// Converts a value to u128. diff --git a/columnar/src/column_values/u64_based/line.rs b/columnar/src/column_values/u64_based/line.rs index 6782fad76..4e6d4802d 100644 --- a/columnar/src/column_values/u64_based/line.rs +++ b/columnar/src/column_values/u64_based/line.rs @@ -8,7 +8,7 @@ use crate::column_values::ColumnValues; const MID_POINT: u64 = (1u64 << 32) - 1u64; /// `Line` describes a line function `y: ax + b` using integer -/// arithmetics. +/// arithmetic. /// /// The slope is in fact a decimal split into a 32 bit integer value, /// and a 32-bit decimal value. @@ -94,7 +94,7 @@ impl Line { // `(i, ys[])`. // // The best intercept therefore has the form - // `y[i] - line.eval(i)` (using wrapping arithmetics). + // `y[i] - line.eval(i)` (using wrapping arithmetic). // In other words, the best intercept is one of the `y - Line::eval(ys[i])` // and our task is just to pick the one that minimizes our error. // diff --git a/columnar/src/column_values/u64_based/mod.rs b/columnar/src/column_values/u64_based/mod.rs index 3edfe1c49..aa2d9818b 100644 --- a/columnar/src/column_values/u64_based/mod.rs +++ b/columnar/src/column_values/u64_based/mod.rs @@ -52,7 +52,7 @@ pub trait ColumnCodecEstimator: 'static { ) -> io::Result<()>; } -/// A column codec describes a colunm serialization format. +/// A column codec describes a column serialization format. pub trait ColumnCodec { /// Specialized `ColumnValues` type. type ColumnValues: ColumnValues + 'static; diff --git a/src/aggregation/agg_result.rs b/src/aggregation/agg_result.rs index 34b5e2043..85b81664d 100644 --- a/src/aggregation/agg_result.rs +++ b/src/aggregation/agg_result.rs @@ -16,7 +16,7 @@ use super::{AggregationError, Key}; use crate::TantivyError; #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] -/// The final aggegation result. +/// The final aggregation result. pub struct AggregationResults(pub FxHashMap); impl AggregationResults { diff --git a/src/aggregation/bucket/filter.rs b/src/aggregation/bucket/filter.rs index cd5f0d6cc..18f2a692a 100644 --- a/src/aggregation/bucket/filter.rs +++ b/src/aggregation/bucket/filter.rs @@ -32,7 +32,7 @@ use crate::{DocId, SegmentReader, TantivyError}; /// /// # Implementation Requirements /// -/// Implementors must: +/// Implementers must: /// 1. Derive `Debug`, `Clone`, `Serialize`, and `Deserialize` /// 2. Use `#[typetag::serde]` attribute on the impl block /// 3. Implement `build_query()` to construct the query from schema/tokenizers diff --git a/src/aggregation/bucket/term_agg.rs b/src/aggregation/bucket/term_agg.rs index 53ce7a5e5..d87cd0078 100644 --- a/src/aggregation/bucket/term_agg.rs +++ b/src/aggregation/bucket/term_agg.rs @@ -550,7 +550,7 @@ trait TermAggregationMap: Clone + Debug + 'static { /// Estimate the memory consumption of this struct in bytes. fn get_memory_consumption(&self) -> usize; - /// Returns the bucket assocaited to a given term_id. + /// Returns the bucket associated to a given term_id. fn term_entry( &mut self, term_id: u64, diff --git a/src/aggregation/metric/extended_stats.rs b/src/aggregation/metric/extended_stats.rs index 0250118a2..d7302e5f5 100644 --- a/src/aggregation/metric/extended_stats.rs +++ b/src/aggregation/metric/extended_stats.rs @@ -62,7 +62,7 @@ impl ExtendedStatsAggregation { /// Extended stats contains a collection of statistics /// they extends stats adding variance, standard deviation -/// and bound informations +/// and bound information #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct ExtendedStats { /// The number of documents. diff --git a/src/directory/directory.rs b/src/directory/directory.rs index e7779382a..0bc4b7f95 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -108,7 +108,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// Opens a file and returns a boxed `FileHandle`. /// /// Users of `Directory` should typically call `Directory::open_read(...)`, - /// while `Directory` implementor should implement `get_file_handle()`. + /// while `Directory` implementer should implement `get_file_handle()`. fn get_file_handle(&self, path: &Path) -> Result, OpenReadError>; /// Once a virtual file is open, its data may not diff --git a/src/error.rs b/src/error.rs index 487cc21a9..39fee20d7 100644 --- a/src/error.rs +++ b/src/error.rs @@ -104,7 +104,7 @@ pub enum TantivyError { #[error("{0:?}")] IncompatibleIndex(Incompatibility), /// An internal error occurred. This is are internal states that should not be reached. - /// e.g. a datastructure is incorrectly inititalized. + /// e.g. a datastructure is incorrectly initialized. #[error("Internal error: '{0}'")] InternalError(String), #[error("Deserialize error: {0}")] diff --git a/src/lib.rs b/src/lib.rs index c8d15552e..8077565c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -125,7 +125,7 @@ //! //! - **Searching**: [Searcher] searches the segments with anything that implements //! [Query](query::Query) and merges the results. The list of [supported -//! queries](query::Query#implementors). Custom Queries are supported by implementing the +//! queries](query::Query#implementers). Custom Queries are supported by implementing the //! [Query](query::Query) trait. //! //! - **[Directory](directory)**: Abstraction over the storage where the index data is stored. diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index 149fa3a7b..8168ee811 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -26,7 +26,7 @@ //! significant amount of time when indexing by avoiding the additional allocations. //! //! ### Important Note -//! The implementor of the `Document` trait must be `'static` and safe to send across +//! The implementer of the `Document` trait must be `'static` and safe to send across //! thread boundaries. //! //! ## Reusing existing types @@ -107,7 +107,7 @@ //! //! Values can just as easily be customised as documents by implementing the `Value` trait. //! -//! The implementor of this type should not own the data it's returning, instead it should just +//! The implementer of this type should not own the data it's returning, instead it should just //! hold references of the data held by the parent [Document] which can then be passed //! on to the [ReferenceValue]. //! @@ -117,7 +117,7 @@ //! //! ### A note about returning values //! The custom value type does not have to be the type stored by the document, instead the -//! implementor of a `Value` can just be used as a way to convert between the owned type +//! implementer of a `Value` can just be used as a way to convert between the owned type //! kept in the parent document, and the value passed into Tantivy. //! //! ``` diff --git a/sstable/src/block_match_automaton.rs b/sstable/src/block_match_automaton.rs index 0c84a05e0..0f375c6f4 100644 --- a/sstable/src/block_match_automaton.rs +++ b/sstable/src/block_match_automaton.rs @@ -102,7 +102,7 @@ fn can_block_match_automaton_with_start( let end_range = end_key[common_prefix_len]; // things starting with start_range were handled in match_range_start - // this starting with end_range are handled bellow. + // this starting with end_range are handled below. // this can run for 0 iteration in cases such as (abc, abd] for rb in (start_range + 1)..end_range { let new_state = automaton.accept(&base_state, rb); diff --git a/sstable/src/dictionary.rs b/sstable/src/dictionary.rs index 501a41bfe..56ddf60be 100644 --- a/sstable/src/dictionary.rs +++ b/sstable/src/dictionary.rs @@ -141,7 +141,7 @@ impl Dictionary { Ok(TSSTable::delta_reader(data)) } else { // if operations are sync, we assume latency is almost null, and there is no point in - // merging accross holes + // merging across holes let blocks = self.get_block_iterator_for_range_and_automaton(key_range, automaton, 0); let data = blocks .map(|block_addr| self.sstable_slice.read_bytes_slice(block_addr.byte_range)) diff --git a/sstable/src/sstable_index_v3.rs b/sstable/src/sstable_index_v3.rs index df5b5d9f3..191a62b99 100644 --- a/sstable/src/sstable_index_v3.rs +++ b/sstable/src/sstable_index_v3.rs @@ -181,7 +181,7 @@ impl SSTableIndexV3 { // cannot match. this isn't as bad as it sounds given the fst is a lot smaller than the rest of the // sstable. // To do that, we can't use tantivy_fst's Stream with an automaton, as we need to know 2 consecutive -// fst keys to form a proper opinion on whether this is a match, which we wan't translate into a +// fst keys to form a proper opinion on whether this is a match, which we want translate into a // single automaton struct GetBlockForAutomaton<'a, A: Automaton> { streamer: tantivy_fst::map::Stream<'a>, diff --git a/sstable/src/streamer.rs b/sstable/src/streamer.rs index eb0c44e29..8477f5fd2 100644 --- a/sstable/src/streamer.rs +++ b/sstable/src/streamer.rs @@ -142,7 +142,7 @@ where } /// Same as `into_stream_async`, but tries to issue a single io operation when requesting - /// blocks that are not consecutive, but also less than `merge_holes_under_bytes` bytes appart. + /// blocks that are not consecutive, but also less than `merge_holes_under_bytes` bytes apart. pub async fn into_stream_async_merging_holes( self, merge_holes_under_bytes: usize, diff --git a/tokenizer-api/src/lib.rs b/tokenizer-api/src/lib.rs index dcc3648a2..93f3a5714 100644 --- a/tokenizer-api/src/lib.rs +++ b/tokenizer-api/src/lib.rs @@ -1,5 +1,5 @@ //! Tokenizer are in charge of chopping text into a stream of tokens -//! ready for indexing. This is an separate crate from tantivy, so implementors don't need to update +//! ready for indexing. This is an separate crate from tantivy, so implementers don't need to update //! for each new tantivy version. //! //! To add support for a tokenizer, implement the [`Tokenizer`] trait.