mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
@@ -1,4 +1,4 @@
|
||||
use std::io::{self, Write};
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
|
||||
mod set;
|
||||
@@ -11,7 +11,7 @@ use set_block::{
|
||||
};
|
||||
|
||||
use crate::iterable::Iterable;
|
||||
use crate::{DocId, InvalidData, RowId};
|
||||
use crate::{DocId, RowId};
|
||||
|
||||
/// The threshold for for number of elements after which we switch to dense block encoding.
|
||||
///
|
||||
@@ -335,38 +335,6 @@ enum Block<'a> {
|
||||
Sparse(SparseBlock<'a>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
enum OptionalIndexCodec {
|
||||
Dense = 0,
|
||||
Sparse = 1,
|
||||
}
|
||||
|
||||
impl OptionalIndexCodec {
|
||||
fn to_code(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
fn try_from_code(code: u8) -> Result<Self, InvalidData> {
|
||||
match code {
|
||||
0 => Ok(Self::Dense),
|
||||
1 => Ok(Self::Sparse),
|
||||
_ => Err(InvalidData),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BinarySerializable for OptionalIndexCodec {
|
||||
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
|
||||
writer.write_all(&[self.to_code()])
|
||||
}
|
||||
|
||||
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
||||
let optional_codec_code = u8::deserialize(reader)?;
|
||||
let optional_codec = Self::try_from_code(optional_codec_code)?;
|
||||
Ok(optional_codec)
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -> io::Result<()> {
|
||||
let is_sparse = is_sparse(block_els.len() as u32);
|
||||
if is_sparse {
|
||||
|
||||
@@ -68,7 +68,7 @@ fn interpret_escape(source: &str) -> String {
|
||||
|
||||
/// Consume a word outside of any context.
|
||||
// TODO should support escape sequences
|
||||
fn word(inp: &str) -> IResult<&str, Cow<str>> {
|
||||
fn word(inp: &str) -> IResult<&str, Cow<'_, str>> {
|
||||
map_res(
|
||||
recognize(tuple((
|
||||
alt((
|
||||
|
||||
@@ -301,7 +301,7 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
|
||||
let bounds = self.bounds;
|
||||
let interval = self.interval;
|
||||
let offset = self.offset;
|
||||
let get_bucket_pos = |val| (get_bucket_pos_f64(val, interval, offset) as i64);
|
||||
let get_bucket_pos = |val| get_bucket_pos_f64(val, interval, offset) as i64;
|
||||
|
||||
bucket_agg_accessor
|
||||
.column_block_accessor
|
||||
|
||||
@@ -484,7 +484,6 @@ impl FacetCounts {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeSet;
|
||||
use std::iter;
|
||||
|
||||
use columnar::Dictionary;
|
||||
use rand::distributions::Uniform;
|
||||
|
||||
@@ -484,10 +484,8 @@ impl Directory for MmapDirectory {
|
||||
.map_err(LockError::wrap_io_error)?;
|
||||
if lock.is_blocking {
|
||||
file.lock_exclusive().map_err(LockError::wrap_io_error)?;
|
||||
} else {
|
||||
if !file.try_lock_exclusive().map_err(|_| LockError::LockBusy)? {
|
||||
return Err(LockError::LockBusy);
|
||||
}
|
||||
} else if !file.try_lock_exclusive().map_err(|_| LockError::LockBusy)? {
|
||||
return Err(LockError::LockBusy);
|
||||
}
|
||||
// dropping the file handle will release the lock.
|
||||
Ok(DirectoryLock::from(Box::new(ReleaseLockFile {
|
||||
|
||||
@@ -146,7 +146,7 @@ impl InvertedIndexReader {
|
||||
positions_size: ByteCount::default(),
|
||||
num_terms: 0u64,
|
||||
};
|
||||
field_space.record(&term_info);
|
||||
field_space.record(term_info);
|
||||
|
||||
// We include the json type and the json end of path to make sure the prefix check
|
||||
// is meaningful.
|
||||
|
||||
@@ -615,7 +615,7 @@ impl<D: Document> IndexWriter<D> {
|
||||
/// It is also possible to add a payload to the `commit`
|
||||
/// using this API.
|
||||
/// See [`PreparedCommit::set_payload()`].
|
||||
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<D>> {
|
||||
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, D>> {
|
||||
// Here, because we join all of the worker threads,
|
||||
// all of the segment update for this commit have been
|
||||
// sent.
|
||||
|
||||
@@ -75,7 +75,7 @@ impl InvertedIndexSerializer {
|
||||
field: Field,
|
||||
total_num_tokens: u64,
|
||||
fieldnorm_reader: Option<FieldNormReader>,
|
||||
) -> io::Result<FieldSerializer> {
|
||||
) -> io::Result<FieldSerializer<'_>> {
|
||||
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
|
||||
let term_dictionary_write = self.terms_write.for_field(field);
|
||||
let postings_write = self.postings_write.for_field(field);
|
||||
@@ -126,7 +126,7 @@ impl<'a> FieldSerializer<'a> {
|
||||
let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?;
|
||||
let average_fieldnorm = fieldnorm_reader
|
||||
.as_ref()
|
||||
.map(|ff_reader| (total_num_tokens as Score / ff_reader.num_docs() as Score))
|
||||
.map(|ff_reader| total_num_tokens as Score / ff_reader.num_docs() as Score)
|
||||
.unwrap_or(0.0);
|
||||
let postings_serializer = PostingsSerializer::new(
|
||||
postings_write,
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::fieldnorm::FieldNormReader;
|
||||
use crate::query::Explanation;
|
||||
use crate::schema::Field;
|
||||
@@ -68,12 +66,6 @@ fn compute_tf_cache(average_fieldnorm: Score) -> [Score; 256] {
|
||||
cache
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
|
||||
pub struct Bm25Params {
|
||||
pub idf: Score,
|
||||
pub avg_fieldnorm: Score,
|
||||
}
|
||||
|
||||
/// A struct used for computing BM25 scores.
|
||||
#[derive(Clone)]
|
||||
pub struct Bm25Weight {
|
||||
|
||||
@@ -302,7 +302,6 @@ fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
|
||||
mod tests {
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::iter;
|
||||
|
||||
use proptest::prelude::*;
|
||||
|
||||
|
||||
@@ -258,7 +258,7 @@ fn search_on_json_numerical_field(
|
||||
|
||||
let bounds = match typ.numerical_type().unwrap() {
|
||||
NumericalType::I64 => {
|
||||
let bounds = bounds.map_bound(|term| (term.as_i64().unwrap()));
|
||||
let bounds = bounds.map_bound(|term| term.as_i64().unwrap());
|
||||
match actual_column_type {
|
||||
NumericalType::I64 => bounds.map_bound(|&term| term.to_u64()),
|
||||
NumericalType::U64 => {
|
||||
@@ -282,7 +282,7 @@ fn search_on_json_numerical_field(
|
||||
}
|
||||
}
|
||||
NumericalType::U64 => {
|
||||
let bounds = bounds.map_bound(|term| (term.as_u64().unwrap()));
|
||||
let bounds = bounds.map_bound(|term| term.as_u64().unwrap());
|
||||
match actual_column_type {
|
||||
NumericalType::U64 => bounds.map_bound(|&term| term.to_u64()),
|
||||
NumericalType::I64 => {
|
||||
@@ -306,7 +306,7 @@ fn search_on_json_numerical_field(
|
||||
}
|
||||
}
|
||||
NumericalType::F64 => {
|
||||
let bounds = bounds.map_bound(|term| (term.as_f64().unwrap()));
|
||||
let bounds = bounds.map_bound(|term| term.as_f64().unwrap());
|
||||
match actual_column_type {
|
||||
NumericalType::U64 => transform_from_f64_bounds::<u64>(&bounds),
|
||||
NumericalType::I64 => transform_from_f64_bounds::<i64>(&bounds),
|
||||
|
||||
@@ -1561,7 +1561,6 @@ fn to_ascii(text: &str, output: &mut String) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter;
|
||||
|
||||
use super::to_ascii;
|
||||
use crate::tokenizer::{AsciiFoldingFilter, RawTokenizer, SimpleTokenizer, TextAnalyzer};
|
||||
|
||||
@@ -609,12 +609,12 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
|
||||
/// Returns a range builder, to stream all of the terms
|
||||
/// within an interval.
|
||||
pub fn range(&self) -> StreamerBuilder<TSSTable> {
|
||||
pub fn range(&self) -> StreamerBuilder<'_, TSSTable> {
|
||||
StreamerBuilder::new(self, AlwaysMatch)
|
||||
}
|
||||
|
||||
/// Returns a range builder filtered with a prefix.
|
||||
pub fn prefix_range<K: AsRef<[u8]>>(&self, prefix: K) -> StreamerBuilder<TSSTable> {
|
||||
pub fn prefix_range<K: AsRef<[u8]>>(&self, prefix: K) -> StreamerBuilder<'_, TSSTable> {
|
||||
let lower_bound = prefix.as_ref();
|
||||
let mut upper_bound = lower_bound.to_vec();
|
||||
for idx in (0..upper_bound.len()).rev() {
|
||||
@@ -633,7 +633,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
||||
}
|
||||
|
||||
/// A stream of all the sorted terms.
|
||||
pub fn stream(&self) -> io::Result<Streamer<TSSTable>> {
|
||||
pub fn stream(&self) -> io::Result<Streamer<'_, TSSTable>> {
|
||||
self.range().into_stream()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user