mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
clippy (#2527)
* clippy * clippy * clippy * clippy * convert allow to expect and remove unused * cargo fmt * cleanup * export sample * clippy
This commit is contained in:
@@ -35,7 +35,6 @@ const IMPLS: [FilterImplPerInstructionSet; 2] = [
|
||||
const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Scalar];
|
||||
|
||||
impl FilterImplPerInstructionSet {
|
||||
#[allow(unused_variables)]
|
||||
#[inline]
|
||||
fn from(code: u8) -> FilterImplPerInstructionSet {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
|
||||
@@ -66,7 +66,7 @@ impl<T: PartialOrd + Copy + std::fmt::Debug + Send + Sync + 'static + Default>
|
||||
&'a self,
|
||||
docs: &'a [u32],
|
||||
accessor: &Column<T>,
|
||||
) -> impl Iterator<Item = (DocId, T)> + '_ {
|
||||
) -> impl Iterator<Item = (DocId, T)> + 'a {
|
||||
if accessor.index.get_cardinality().is_full() {
|
||||
docs.iter().cloned().zip(self.val_cache.iter().cloned())
|
||||
} else {
|
||||
|
||||
@@ -82,7 +82,7 @@ impl<'a> SparseBlock<'a> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(clippy::comparison_chain)]
|
||||
#[expect(clippy::comparison_chain)]
|
||||
// Looks for the element in the block. Returns the positions if found.
|
||||
fn binary_search(&self, target: u16) -> Result<u16, u16> {
|
||||
let data = &self.0;
|
||||
|
||||
@@ -128,7 +128,7 @@ pub fn open_u128_as_compact_u64(mut bytes: OwnedBytes) -> io::Result<Arc<dyn Col
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
use super::*;
|
||||
use crate::column_values::u64_based::{
|
||||
serialize_and_load_u64_based_column_values, serialize_u64_based_column_values,
|
||||
|
||||
@@ -122,7 +122,7 @@ impl<T> From<T> for ColumnOperation<T> {
|
||||
// In order to limit memory usage, and in order
|
||||
// to benefit from the stacker, we do this by serialization our data
|
||||
// as "Symbols".
|
||||
#[allow(clippy::from_over_into)]
|
||||
#[expect(clippy::from_over_into)]
|
||||
pub(super) trait SymbolValue: Clone + Copy {
|
||||
// Serializes the symbol into the given buffer.
|
||||
// Returns the number of bytes written into the buffer.
|
||||
|
||||
@@ -392,7 +392,7 @@ impl ColumnarWriter {
|
||||
|
||||
// Serialize [Dictionary, Column, dictionary num bytes U32::LE]
|
||||
// Column: [Column Index, Column Values, column index num bytes U32::LE]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[expect(clippy::too_many_arguments)]
|
||||
fn serialize_bytes_or_str_column(
|
||||
cardinality: Cardinality,
|
||||
num_docs: RowId,
|
||||
|
||||
@@ -130,7 +130,7 @@ pub fn replace_in_place(needle: u8, replacement: u8, bytes: &mut [u8]) {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
pub(crate) mod test {
|
||||
|
||||
use proptest::prelude::*;
|
||||
|
||||
@@ -144,7 +144,7 @@ pub mod test {
|
||||
assert_eq!(u64_to_f64(f64_to_u64(val)), val);
|
||||
}
|
||||
|
||||
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
pub(crate) fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
let mut buffer = Vec::new();
|
||||
O::default().serialize(&mut buffer).unwrap();
|
||||
assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
|
||||
|
||||
@@ -111,7 +111,6 @@ where F: nom::Parser<I, (O, ErrorList), Infallible> {
|
||||
Err(Err::Incomplete(needed)) => Err(Err::Incomplete(needed)),
|
||||
// old versions don't understand this is uninhabited and need the empty match to help,
|
||||
// newer versions warn because this arm is unreachable (which it is indeed).
|
||||
#[allow(unreachable_patterns)]
|
||||
Err(Err::Error(val)) | Err(Err::Failure(val)) => match val {},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -767,7 +767,7 @@ fn occur_leaf(inp: &str) -> IResult<&str, (Option<Occur>, UserInputAst)> {
|
||||
tuple((fallible(occur_symbol), boosted_leaf))(inp)
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
#[expect(clippy::type_complexity)]
|
||||
fn operand_occur_leaf_infallible(
|
||||
inp: &str,
|
||||
) -> JResult<&str, (Option<BinaryOperand>, Option<Occur>, Option<UserInputAst>)> {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Contains the final aggregation tree.
|
||||
//!
|
||||
//! This tree can be converted via the `into()` method from `IntermediateAggregationResults`.
|
||||
//! This conversion computes the final result. For example: The intermediate result contains
|
||||
//! intermediate average results, which is the sum and the number of values. The actual average is
|
||||
@@ -187,7 +188,7 @@ pub enum BucketEntries<T> {
|
||||
}
|
||||
|
||||
impl<T> BucketEntries<T> {
|
||||
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &T> + 'a> {
|
||||
fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = &'a T> + 'a> {
|
||||
match self {
|
||||
BucketEntries::Vec(vec) => Box::new(vec.iter()),
|
||||
BucketEntries::HashMap(map) => Box::new(map.values()),
|
||||
|
||||
@@ -244,7 +244,7 @@ fn parse_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -16,6 +16,7 @@ use crate::aggregation::*;
|
||||
use crate::TantivyError;
|
||||
|
||||
/// Provide user-defined buckets to aggregate on.
|
||||
///
|
||||
/// Two special buckets will automatically be created to cover the whole range of values.
|
||||
/// The provided buckets have to be continuous.
|
||||
/// During the aggregation, the values extracted from the fast_field `field` will be checked
|
||||
|
||||
@@ -180,7 +180,7 @@ pub(crate) fn deserialize_option_f64<'de, D>(deserializer: D) -> Result<Option<f
|
||||
where D: Deserializer<'de> {
|
||||
struct StringOrFloatVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for StringOrFloatVisitor {
|
||||
impl Visitor<'_> for StringOrFloatVisitor {
|
||||
type Value = Option<f64>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
@@ -226,7 +226,7 @@ pub(crate) fn deserialize_f64<'de, D>(deserializer: D) -> Result<f64, D::Error>
|
||||
where D: Deserializer<'de> {
|
||||
struct StringOrFloatVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for StringOrFloatVisitor {
|
||||
impl Visitor<'_> for StringOrFloatVisitor {
|
||||
type Value = f64;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
|
||||
@@ -13,7 +13,7 @@ struct Hit<'a> {
|
||||
facet: &'a Facet,
|
||||
}
|
||||
|
||||
impl<'a> Eq for Hit<'a> {}
|
||||
impl Eq for Hit<'_> {}
|
||||
|
||||
impl<'a> PartialEq<Hit<'a>> for Hit<'a> {
|
||||
fn eq(&self, other: &Hit<'_>) -> bool {
|
||||
@@ -27,7 +27,7 @@ impl<'a> PartialOrd<Hit<'a>> for Hit<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Ord for Hit<'a> {
|
||||
impl Ord for Hit<'_> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
other
|
||||
.count
|
||||
|
||||
@@ -182,6 +182,7 @@ where
|
||||
}
|
||||
|
||||
/// A variant of the [`FilterCollector`] specialized for bytes fast fields, i.e.
|
||||
///
|
||||
/// it transparently wraps an inner [`Collector`] but filters documents
|
||||
/// based on the result of applying the predicate to the bytes fast field.
|
||||
///
|
||||
|
||||
@@ -495,4 +495,4 @@ where
|
||||
impl_downcast!(Fruit);
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests;
|
||||
pub(crate) mod tests;
|
||||
|
||||
@@ -161,7 +161,7 @@ impl<TFruit: Fruit> FruitHandle<TFruit> {
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[allow(clippy::type_complexity)]
|
||||
#[expect(clippy::type_complexity)]
|
||||
#[derive(Default)]
|
||||
pub struct MultiCollector<'a> {
|
||||
collector_wrappers: Vec<
|
||||
@@ -190,7 +190,7 @@ impl<'a> MultiCollector<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Collector for MultiCollector<'a> {
|
||||
impl Collector for MultiCollector<'_> {
|
||||
type Fruit = MultiFruit;
|
||||
type Child = MultiCollectorChild;
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ pub fn json_path_sep_to_dot(path: &mut str) {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[expect(clippy::too_many_arguments)]
|
||||
fn index_json_object<'a, V: Value<'a>>(
|
||||
doc: DocId,
|
||||
json_visitor: V::ObjectIter,
|
||||
@@ -101,7 +101,7 @@ fn index_json_object<'a, V: Value<'a>>(
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[expect(clippy::too_many_arguments)]
|
||||
pub(crate) fn index_json_value<'a, V: Value<'a>>(
|
||||
doc: DocId,
|
||||
json_value: V,
|
||||
|
||||
@@ -39,7 +39,7 @@ impl RetryPolicy {
|
||||
/// The `DirectoryLock` is an object that represents a file lock.
|
||||
///
|
||||
/// It is associated with a lock file, that gets deleted on `Drop.`
|
||||
#[allow(dead_code)]
|
||||
#[expect(dead_code)]
|
||||
pub struct DirectoryLock(Box<dyn Send + Sync + 'static>);
|
||||
|
||||
struct DirectoryLockGuard {
|
||||
|
||||
@@ -48,6 +48,7 @@ pub static INDEX_WRITER_LOCK: Lazy<Lock> = Lazy::new(|| Lock {
|
||||
});
|
||||
/// The meta lock file is here to protect the segment files being opened by
|
||||
/// `IndexReader::reload()` from being garbage collected.
|
||||
///
|
||||
/// It makes it possible for another process to safely consume
|
||||
/// our index in-writing. Ideally, we may have preferred `RWLock` semantics
|
||||
/// here, but it is difficult to achieve on Windows.
|
||||
|
||||
@@ -244,7 +244,7 @@ impl MmapDirectory {
|
||||
directory_path,
|
||||
)));
|
||||
}
|
||||
#[allow(clippy::bind_instead_of_map)]
|
||||
#[expect(clippy::bind_instead_of_map)]
|
||||
let canonical_path: PathBuf = directory_path.canonicalize().or_else(|io_err| {
|
||||
let directory_path = directory_path.to_owned();
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ pub struct WatchCallbackList {
|
||||
/// file change is detected.
|
||||
#[must_use = "This `WatchHandle` controls the lifetime of the watch and should therefore be used."]
|
||||
#[derive(Clone)]
|
||||
#[allow(dead_code)]
|
||||
#[expect(dead_code)]
|
||||
pub struct WatchHandle(Arc<WatchCallback>);
|
||||
|
||||
impl WatchHandle {
|
||||
|
||||
@@ -117,7 +117,7 @@ pub trait DocSet: Send {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DocSet for &'a mut dyn DocSet {
|
||||
impl DocSet for &mut dyn DocSet {
|
||||
fn advance(&mut self) -> u32 {
|
||||
(**self).advance()
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ impl FieldNormReader {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn for_test(field_norms: &[u32]) -> FieldNormReader {
|
||||
pub(crate) fn for_test(field_norms: &[u32]) -> FieldNormReader {
|
||||
let field_norms_id = field_norms
|
||||
.iter()
|
||||
.cloned()
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
#![allow(deprecated)] // Remove with index sorting
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use rand::{thread_rng, Rng};
|
||||
|
||||
use crate::indexer::index_writer::MEMORY_BUDGET_NUM_BYTES_MIN;
|
||||
use crate::schema::*;
|
||||
#[allow(deprecated)]
|
||||
use crate::{doc, schema, Index, IndexWriter, Searcher};
|
||||
|
||||
fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> {
|
||||
|
||||
@@ -31,7 +31,6 @@ pub struct InvertedIndexReader {
|
||||
}
|
||||
|
||||
impl InvertedIndexReader {
|
||||
#[allow(clippy::needless_pass_by_value)] // for symmetry
|
||||
pub(crate) fn new(
|
||||
termdict: TermDictionary,
|
||||
postings_file_slice: FileSlice,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::slice;
|
||||
|
||||
/// Enum describing each component of a tantivy segment.
|
||||
///
|
||||
/// Each component is stored in its own file,
|
||||
/// using the pattern `segment_uuid`.`component_extension`,
|
||||
/// except the delete component that takes an `segment_uuid`.`delete_opstamp`.`component_extension`
|
||||
|
||||
@@ -478,7 +478,7 @@ pub fn merge_field_meta_data(
|
||||
.into_iter()
|
||||
.kmerge_by(|left, right| left < right)
|
||||
// TODO: Remove allocation
|
||||
.group_by(|el| (el.field_name.to_string(), el.typ))
|
||||
.chunk_by(|el| (el.field_name.to_string(), el.typ))
|
||||
{
|
||||
let mut merged: FieldMetadata = group.next().unwrap();
|
||||
for el in group {
|
||||
|
||||
@@ -187,7 +187,6 @@ impl DeleteCursor {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
fn is_behind_opstamp(&mut self, target_opstamp: Opstamp) -> bool {
|
||||
self.get()
|
||||
.map(|operation| operation.opstamp < target_opstamp)
|
||||
|
||||
@@ -21,7 +21,7 @@ pub enum DocToOpstampMapping<'a> {
|
||||
None,
|
||||
}
|
||||
|
||||
impl<'a> DocToOpstampMapping<'a> {
|
||||
impl DocToOpstampMapping<'_> {
|
||||
/// Assess whether a document should be considered deleted given that it contains
|
||||
/// a deleted term that was deleted at the opstamp: `delete_opstamp`.
|
||||
///
|
||||
|
||||
@@ -104,7 +104,7 @@ impl MergePolicy for LogMergePolicy {
|
||||
|
||||
let mut current_max_log_size = f64::MAX;
|
||||
let mut levels = vec![];
|
||||
for (_, merge_group) in &size_sorted_segments.into_iter().group_by(|segment| {
|
||||
for (_, merge_group) in &size_sorted_segments.into_iter().chunk_by(|segment| {
|
||||
let segment_log_size = f64::from(self.clip_min_size(segment.num_docs())).log2();
|
||||
if segment_log_size < (current_max_log_size - self.level_log_size) {
|
||||
// update current_max_log_size to create a new group
|
||||
|
||||
@@ -36,7 +36,7 @@ impl MergePolicy for NoMergePolicy {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ impl SegmentWriter {
|
||||
let vals_grouped_by_field = doc
|
||||
.iter_fields_and_values()
|
||||
.sorted_by_key(|(field, _)| *field)
|
||||
.group_by(|(field, _)| *field);
|
||||
.chunk_by(|(field, _)| *field);
|
||||
|
||||
for (field, field_values) in &vals_grouped_by_field {
|
||||
let values = field_values.map(|el| el.1);
|
||||
|
||||
@@ -101,7 +101,7 @@ mod test {
|
||||
|
||||
use super::Stamper;
|
||||
|
||||
#[allow(clippy::redundant_clone)]
|
||||
#[expect(clippy::redundant_clone)]
|
||||
#[test]
|
||||
fn test_stamper() {
|
||||
let stamper = Stamper::new(7u64);
|
||||
@@ -117,7 +117,7 @@ mod test {
|
||||
assert_eq!(stamper.stamp(), 15u64);
|
||||
}
|
||||
|
||||
#[allow(clippy::redundant_clone)]
|
||||
#[expect(clippy::redundant_clone)]
|
||||
#[test]
|
||||
fn test_stamper_revert() {
|
||||
let stamper = Stamper::new(7u64);
|
||||
|
||||
@@ -178,10 +178,8 @@ pub use crate::future_result::FutureResult;
|
||||
pub type Result<T> = std::result::Result<T, TantivyError>;
|
||||
|
||||
mod core;
|
||||
#[allow(deprecated)] // Remove with index sorting
|
||||
pub mod indexer;
|
||||
|
||||
#[allow(unused_doc_comments)]
|
||||
pub mod error;
|
||||
pub mod tokenizer;
|
||||
|
||||
@@ -190,7 +188,6 @@ pub mod collector;
|
||||
pub mod directory;
|
||||
pub mod fastfield;
|
||||
pub mod fieldnorm;
|
||||
#[allow(deprecated)] // Remove with index sorting
|
||||
pub mod index;
|
||||
pub mod positions;
|
||||
pub mod postings;
|
||||
@@ -223,7 +220,6 @@ pub use self::docset::{DocSet, COLLECT_BLOCK_BUFFER_LEN, TERMINATED};
|
||||
pub use crate::core::json_utils;
|
||||
pub use crate::core::{Executor, Searcher, SearcherGeneration};
|
||||
pub use crate::directory::Directory;
|
||||
#[allow(deprecated)] // Remove with index sorting
|
||||
pub use crate::index::{
|
||||
Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, Segment,
|
||||
SegmentMeta, SegmentReader,
|
||||
@@ -371,6 +367,7 @@ macro_rules! fail_point {
|
||||
}};
|
||||
}
|
||||
|
||||
/// Common test utilities.
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use common::{BinarySerializable, FixedSize};
|
||||
@@ -389,6 +386,7 @@ pub mod tests {
|
||||
use crate::schema::*;
|
||||
use crate::{DateTime, DocAddress, Index, IndexWriter, ReloadPolicy};
|
||||
|
||||
/// Asserts that the serialized value is the value in the trait.
|
||||
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
|
||||
let mut buffer = Vec::new();
|
||||
O::default().serialize(&mut buffer).unwrap();
|
||||
@@ -421,6 +419,7 @@ pub mod tests {
|
||||
}};
|
||||
}
|
||||
|
||||
/// Generates random numbers
|
||||
pub fn generate_nonunique_unsorted(max_value: u32, n_elems: usize) -> Vec<u32> {
|
||||
let seed: [u8; 32] = [1; 32];
|
||||
StdRng::from_seed(seed)
|
||||
@@ -429,6 +428,7 @@ pub mod tests {
|
||||
.collect::<Vec<u32>>()
|
||||
}
|
||||
|
||||
/// Sample `n` elements with Bernoulli distribution.
|
||||
pub fn sample_with_seed(n: u32, ratio: f64, seed_val: u8) -> Vec<u32> {
|
||||
StdRng::from_seed([seed_val; 32])
|
||||
.sample_iter(&Bernoulli::new(ratio).unwrap())
|
||||
@@ -438,6 +438,7 @@ pub mod tests {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Sample `n` elements with Bernoulli distribution.
|
||||
pub fn sample(n: u32, ratio: f64) -> Vec<u32> {
|
||||
sample_with_seed(n, ratio, 4)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,6 @@
|
||||
/// );
|
||||
/// # }
|
||||
/// ```
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! doc(
|
||||
() => {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! Tantivy can (if instructed to do so in the schema) store the term positions in a given field.
|
||||
//!
|
||||
//! This position is expressed as token ordinal. For instance,
|
||||
//! In "The beauty and the beast", the term "the" appears in position 0 and position 3.
|
||||
//! This information is useful to run phrase queries.
|
||||
@@ -38,7 +39,7 @@ pub use self::serializer::PositionSerializer;
|
||||
const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use std::iter;
|
||||
|
||||
|
||||
@@ -264,7 +264,7 @@ impl VIntDecoder for BlockDecoder {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::TERMINATED;
|
||||
|
||||
@@ -104,7 +104,7 @@ impl Postings for LoadedPostings {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ pub use self::serializer::{FieldSerializer, InvertedIndexSerializer};
|
||||
pub(crate) use self::skip::{BlockInfo, SkipReader};
|
||||
pub use self::term_info::TermInfo;
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[expect(clippy::enum_variant_names)]
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Eq)]
|
||||
pub(crate) enum FreqReadingOption {
|
||||
NoFreq,
|
||||
@@ -40,7 +40,7 @@ pub(crate) enum FreqReadingOption {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
use std::mem;
|
||||
|
||||
use super::{InvertedIndexSerializer, Postings};
|
||||
|
||||
@@ -34,7 +34,7 @@ impl<'a> VInt32Reader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for VInt32Reader<'a> {
|
||||
impl Iterator for VInt32Reader<'_> {
|
||||
type Item = u32;
|
||||
|
||||
fn next(&mut self) -> Option<u32> {
|
||||
|
||||
@@ -272,7 +272,7 @@ impl<'a> From<&'a mut TermScorer> for TermScorerWithMaxScore<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for TermScorerWithMaxScore<'a> {
|
||||
impl Deref for TermScorerWithMaxScore<'_> {
|
||||
type Target = TermScorer;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
@@ -280,7 +280,7 @@ impl<'a> Deref for TermScorerWithMaxScore<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> DerefMut for TermScorerWithMaxScore<'a> {
|
||||
impl DerefMut for TermScorerWithMaxScore<'_> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
self.scorer
|
||||
}
|
||||
@@ -417,7 +417,7 @@ mod tests {
|
||||
.boxed()
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
#[expect(clippy::type_complexity)]
|
||||
fn gen_term_scorers(num_scorers: usize) -> BoxedStrategy<(Vec<Vec<(DocId, u32)>>, Vec<u32>)> {
|
||||
(1u32..100u32)
|
||||
.prop_flat_map(move |max_doc: u32| {
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::{DocId, Score};
|
||||
|
||||
// MultiPrefix is the larger variant, and also the one we expect most often. PhraseScorer is > 1kB
|
||||
// though, it would be interesting to slim it down if possible.
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[expect(clippy::large_enum_variant)]
|
||||
enum PhraseKind<TPostings: Postings> {
|
||||
SinglePrefix {
|
||||
position_offset: u32,
|
||||
|
||||
@@ -10,7 +10,7 @@ pub use self::phrase_scorer::PhraseScorer;
|
||||
pub use self::phrase_weight::PhraseWeight;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
|
||||
@@ -402,7 +402,7 @@ fn search_on_u64_ff(
|
||||
boost: Score,
|
||||
bounds: BoundsRange<u64>,
|
||||
) -> crate::Result<Box<dyn Scorer>> {
|
||||
#[allow(clippy::reversed_empty_ranges)]
|
||||
#[expect(clippy::reversed_empty_ranges)]
|
||||
let value_range = bound_to_value_range(
|
||||
&bounds.lower_bound,
|
||||
&bounds.upper_bound,
|
||||
@@ -1386,7 +1386,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod ip_range_tests {
|
||||
pub(crate) mod ip_range_tests {
|
||||
use proptest::prelude::ProptestConfig;
|
||||
use proptest::strategy::Strategy;
|
||||
use proptest::{prop_oneof, proptest};
|
||||
|
||||
@@ -50,7 +50,7 @@ impl HasLen for VecDocSet {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::docset::COLLECT_BLOCK_BUFFER_LEN;
|
||||
|
||||
@@ -394,7 +394,7 @@ where R: Read
|
||||
type_codes::NULL_CODE => ValueType::Null,
|
||||
type_codes::ARRAY_CODE => ValueType::Array,
|
||||
type_codes::OBJECT_CODE => ValueType::Object,
|
||||
#[allow(deprecated)]
|
||||
#[expect(deprecated)]
|
||||
type_codes::JSON_OBJ_CODE => ValueType::JSONObject,
|
||||
_ => {
|
||||
return Err(DeserializeError::from(io::Error::new(
|
||||
|
||||
@@ -401,7 +401,7 @@ impl PartialEq for CompactDocValue<'_> {
|
||||
value1 == value2
|
||||
}
|
||||
}
|
||||
impl<'a> From<CompactDocValue<'a>> for OwnedValue {
|
||||
impl From<CompactDocValue<'_>> for OwnedValue {
|
||||
fn from(value: CompactDocValue) -> Self {
|
||||
value.as_value().into()
|
||||
}
|
||||
|
||||
@@ -331,6 +331,7 @@ where B: AsRef<[u8]>
|
||||
}
|
||||
|
||||
/// ValueBytes represents a serialized value.
|
||||
///
|
||||
/// The value can be of any type of [`Type`] (e.g. string, u64, f64, bool, date, JSON).
|
||||
/// The serialized representation matches the lexicographical order of the type.
|
||||
///
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! [`SnippetGenerator`]
|
||||
//! Generates a text snippet for a given document, and some highlighted parts inside it.
|
||||
//!
|
||||
//! Imagine you doing a text search in a document
|
||||
//! and want to show a preview of where in the document the search terms occur,
|
||||
//! along with some surrounding text to give context, and the search terms highlighted.
|
||||
@@ -436,7 +437,7 @@ impl SnippetGenerator {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn terms_text(&self) -> &BTreeMap<String, Score> {
|
||||
pub(crate) fn terms_text(&self) -> &BTreeMap<String, Score> {
|
||||
&self.terms_text
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ pub struct SegmentSpaceUsage {
|
||||
}
|
||||
|
||||
impl SegmentSpaceUsage {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[expect(clippy::too_many_arguments)]
|
||||
pub(crate) fn new(
|
||||
num_docs: u32,
|
||||
termdict: PerFieldSpaceUsage,
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::mem;
|
||||
use lz4_flex::{compress_into, decompress_into};
|
||||
|
||||
#[inline]
|
||||
#[allow(clippy::uninit_vec)]
|
||||
#[expect(clippy::uninit_vec)]
|
||||
pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
compressed.clear();
|
||||
let maximum_output_size =
|
||||
@@ -24,7 +24,7 @@ pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()>
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[allow(clippy::uninit_vec)]
|
||||
#[expect(clippy::uninit_vec)]
|
||||
pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
|
||||
decompressed.clear();
|
||||
let uncompressed_size_bytes: &[u8; 4] = compressed
|
||||
|
||||
@@ -11,7 +11,7 @@ pub struct LayerCursor<'a> {
|
||||
cursor: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LayerCursor<'a> {
|
||||
impl Iterator for LayerCursor<'_> {
|
||||
type Item = Checkpoint;
|
||||
|
||||
fn next(&mut self) -> Option<Checkpoint> {
|
||||
|
||||
@@ -53,7 +53,7 @@ mod compression_lz4_block;
|
||||
mod compression_zstd_block;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ where A: Automaton
|
||||
current_value: TermInfo,
|
||||
}
|
||||
|
||||
impl<'a, A> TermStreamer<'a, A>
|
||||
impl<A> TermStreamer<'_, A>
|
||||
where A: Automaton
|
||||
{
|
||||
/// Advance position the stream on the next item.
|
||||
@@ -136,7 +136,7 @@ where A: Automaton
|
||||
}
|
||||
|
||||
/// Return the next `(key, value)` pair.
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
#[expect(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Option<(&[u8], &TermInfo)> {
|
||||
if self.advance() {
|
||||
Some((self.key(), self.value()))
|
||||
|
||||
@@ -49,7 +49,6 @@ use crate::postings::TermInfo;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
#[repr(u32)]
|
||||
#[allow(dead_code)]
|
||||
enum DictionaryType {
|
||||
Fst = 1,
|
||||
SSTable = 2,
|
||||
|
||||
@@ -42,7 +42,7 @@ pub struct AsciiFoldingFilterTokenStream<'a, T> {
|
||||
tail: T,
|
||||
}
|
||||
|
||||
impl<'a, T: TokenStream> TokenStream for AsciiFoldingFilterTokenStream<'a, T> {
|
||||
impl<T: TokenStream> TokenStream for AsciiFoldingFilterTokenStream<'_, T> {
|
||||
fn advance(&mut self) -> bool {
|
||||
if !self.tail.advance() {
|
||||
return false;
|
||||
|
||||
@@ -40,7 +40,7 @@ impl Tokenizer for FacetTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for FacetTokenStream<'a> {
|
||||
impl TokenStream for FacetTokenStream<'_> {
|
||||
fn advance(&mut self) -> bool {
|
||||
match self.state {
|
||||
State::RootFacetNotEmitted => {
|
||||
|
||||
@@ -51,7 +51,7 @@ fn to_lowercase_unicode(text: &str, output: &mut String) {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: TokenStream> TokenStream for LowerCaserTokenStream<'a, T> {
|
||||
impl<T: TokenStream> TokenStream for LowerCaserTokenStream<'_, T> {
|
||||
fn advance(&mut self) -> bool {
|
||||
if !self.tail.advance() {
|
||||
return false;
|
||||
|
||||
@@ -166,7 +166,7 @@ pub use self::whitespace_tokenizer::WhitespaceTokenizer;
|
||||
pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
pub(crate) mod tests {
|
||||
use super::{
|
||||
Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, Token, TokenizerManager,
|
||||
};
|
||||
|
||||
@@ -159,7 +159,7 @@ impl Tokenizer for NgramTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for NgramTokenStream<'a> {
|
||||
impl TokenStream for NgramTokenStream<'_> {
|
||||
fn advance(&mut self) -> bool {
|
||||
if let Some((offset_from, offset_to)) = self.ngram_charidx_iterator.next() {
|
||||
if self.prefix_only && offset_from > 0 {
|
||||
@@ -283,7 +283,7 @@ impl<'a> CodepointFrontiers<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for CodepointFrontiers<'a> {
|
||||
impl Iterator for CodepointFrontiers<'_> {
|
||||
type Item = usize;
|
||||
|
||||
fn next(&mut self) -> Option<usize> {
|
||||
|
||||
@@ -28,7 +28,7 @@ impl Tokenizer for RawTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for RawTokenStream<'a> {
|
||||
impl TokenStream for RawTokenStream<'_> {
|
||||
fn advance(&mut self) -> bool {
|
||||
let result = self.has_token;
|
||||
self.has_token = false;
|
||||
|
||||
@@ -4,6 +4,7 @@ use super::{Token, TokenStream, Tokenizer};
|
||||
use crate::TantivyError;
|
||||
|
||||
/// Tokenize the text by using a regex pattern to split.
|
||||
///
|
||||
/// Each match of the regex emits a distinct token, empty tokens will not be emitted. Anchors such
|
||||
/// as `\A` will match the text from the part where the last token was emitted or the beginning of
|
||||
/// the complete text if no token was emitted yet.
|
||||
@@ -83,7 +84,7 @@ pub struct RegexTokenStream<'a> {
|
||||
cursor: usize,
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for RegexTokenStream<'a> {
|
||||
impl TokenStream for RegexTokenStream<'_> {
|
||||
fn advance(&mut self) -> bool {
|
||||
let Some(regex_match) = self.regex.find(self.text) else {
|
||||
return false;
|
||||
|
||||
@@ -27,7 +27,7 @@ impl Tokenizer for SimpleTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SimpleTokenStream<'a> {
|
||||
impl SimpleTokenStream<'_> {
|
||||
// search for the end of the current token.
|
||||
fn search_token_end(&mut self) -> usize {
|
||||
(&mut self.chars)
|
||||
@@ -38,7 +38,7 @@ impl<'a> SimpleTokenStream<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for SimpleTokenStream<'a> {
|
||||
impl TokenStream for SimpleTokenStream<'_> {
|
||||
fn advance(&mut self) -> bool {
|
||||
self.token.text.clear();
|
||||
self.token.position = self.token.position.wrapping_add(1);
|
||||
|
||||
@@ -122,7 +122,7 @@ pub struct SplitCompoundWordsTokenStream<'a, T> {
|
||||
parts: &'a mut Vec<Token>,
|
||||
}
|
||||
|
||||
impl<'a, T: TokenStream> SplitCompoundWordsTokenStream<'a, T> {
|
||||
impl<T: TokenStream> SplitCompoundWordsTokenStream<'_, T> {
|
||||
// Will use `self.cuts` to fill `self.parts` if `self.tail.token()`
|
||||
// can fully be split into consecutive matches against `self.dict`.
|
||||
fn split(&mut self) {
|
||||
@@ -158,7 +158,7 @@ impl<'a, T: TokenStream> SplitCompoundWordsTokenStream<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: TokenStream> TokenStream for SplitCompoundWordsTokenStream<'a, T> {
|
||||
impl<T: TokenStream> TokenStream for SplitCompoundWordsTokenStream<'_, T> {
|
||||
fn advance(&mut self) -> bool {
|
||||
self.parts.pop();
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ impl Tokenizer for WhitespaceTokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> WhitespaceTokenStream<'a> {
|
||||
impl WhitespaceTokenStream<'_> {
|
||||
// search for the end of the current token.
|
||||
fn search_token_end(&mut self) -> usize {
|
||||
(&mut self.chars)
|
||||
@@ -37,7 +37,7 @@ impl<'a> WhitespaceTokenStream<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenStream for WhitespaceTokenStream<'a> {
|
||||
impl TokenStream for WhitespaceTokenStream<'_> {
|
||||
fn advance(&mut self) -> bool {
|
||||
self.token.text.clear();
|
||||
self.token.position = self.token.position.wrapping_add(1);
|
||||
|
||||
@@ -85,7 +85,6 @@ pub trait SSTable: Sized {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct VoidSSTable;
|
||||
|
||||
impl SSTable for VoidSSTable {
|
||||
@@ -100,7 +99,6 @@ impl SSTable for VoidSSTable {
|
||||
/// In other words, two keys `k1` and `k2`
|
||||
/// such that `k1` <= `k2`, are required to observe
|
||||
/// `range_sstable[k1] <= range_sstable[k2]`.
|
||||
#[allow(dead_code)]
|
||||
pub struct MonotonicU64SSTable;
|
||||
|
||||
impl SSTable for MonotonicU64SSTable {
|
||||
|
||||
@@ -26,7 +26,6 @@ impl<B: AsRef<[u8]>> PartialEq for HeapItem<B> {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn merge_sstable<SST: SSTable, W: io::Write, M: ValueMerger<SST::Value>>(
|
||||
readers: Vec<Reader<SST::ValueReader>>,
|
||||
mut writer: Writer<W, SST::ValueWriter>,
|
||||
|
||||
@@ -264,7 +264,7 @@ where
|
||||
}
|
||||
|
||||
/// Return the next `(key, value)` pair.
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
#[expect(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Option<(&[u8], &TSSTable::Value)> {
|
||||
if self.advance() {
|
||||
Some((self.key(), self.value()))
|
||||
|
||||
@@ -78,7 +78,7 @@ impl ValueWriter for RangeValueWriter {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::single_range_in_vec_init)]
|
||||
#[expect(clippy::single_range_in_vec_init)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ extern crate test;
|
||||
|
||||
mod arena_hashmap;
|
||||
mod expull;
|
||||
#[allow(dead_code)]
|
||||
mod fastcmp;
|
||||
mod fastcpy;
|
||||
mod memory_arena;
|
||||
|
||||
@@ -89,7 +89,6 @@ pub fn load<Item: Copy + 'static>(data: &[u8]) -> Item {
|
||||
}
|
||||
|
||||
/// The `MemoryArena`
|
||||
#[allow(clippy::new_without_default)]
|
||||
pub struct MemoryArena {
|
||||
pages: Vec<Page>,
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ impl SharedArenaHashMap {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_key_value<'a>(&'a self, addr: Addr, memory_arena: &'a MemoryArena) -> (&[u8], Addr) {
|
||||
fn get_key_value<'a>(&'a self, addr: Addr, memory_arena: &'a MemoryArena) -> (&'a [u8], Addr) {
|
||||
let data = memory_arena.slice_from(addr);
|
||||
let key_bytes_len_bytes = unsafe { data.get_unchecked(..2) };
|
||||
let key_bytes_len = u16::from_le_bytes(key_bytes_len_bytes.try_into().unwrap());
|
||||
@@ -233,7 +233,7 @@ impl SharedArenaHashMap {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter<'a>(&'a self, memory_arena: &'a MemoryArena) -> Iter<'_> {
|
||||
pub fn iter<'a>(&'a self, memory_arena: &'a MemoryArena) -> Iter<'a> {
|
||||
Iter {
|
||||
inner: self
|
||||
.table
|
||||
|
||||
Reference in New Issue
Block a user