mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
@@ -1,4 +1,4 @@
|
|||||||
use std::io::{self, Write};
|
use std::io;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
mod set;
|
mod set;
|
||||||
@@ -11,7 +11,7 @@ use set_block::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use crate::iterable::Iterable;
|
use crate::iterable::Iterable;
|
||||||
use crate::{DocId, InvalidData, RowId};
|
use crate::{DocId, RowId};
|
||||||
|
|
||||||
/// The threshold for for number of elements after which we switch to dense block encoding.
|
/// The threshold for for number of elements after which we switch to dense block encoding.
|
||||||
///
|
///
|
||||||
@@ -335,38 +335,6 @@ enum Block<'a> {
|
|||||||
Sparse(SparseBlock<'a>),
|
Sparse(SparseBlock<'a>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
enum OptionalIndexCodec {
|
|
||||||
Dense = 0,
|
|
||||||
Sparse = 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl OptionalIndexCodec {
|
|
||||||
fn to_code(self) -> u8 {
|
|
||||||
self as u8
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_from_code(code: u8) -> Result<Self, InvalidData> {
|
|
||||||
match code {
|
|
||||||
0 => Ok(Self::Dense),
|
|
||||||
1 => Ok(Self::Sparse),
|
|
||||||
_ => Err(InvalidData),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BinarySerializable for OptionalIndexCodec {
|
|
||||||
fn serialize<W: Write + ?Sized>(&self, writer: &mut W) -> io::Result<()> {
|
|
||||||
writer.write_all(&[self.to_code()])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
|
|
||||||
let optional_codec_code = u8::deserialize(reader)?;
|
|
||||||
let optional_codec = Self::try_from_code(optional_codec_code)?;
|
|
||||||
Ok(optional_codec)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -> io::Result<()> {
|
fn serialize_optional_index_block(block_els: &[u16], out: &mut impl io::Write) -> io::Result<()> {
|
||||||
let is_sparse = is_sparse(block_els.len() as u32);
|
let is_sparse = is_sparse(block_els.len() as u32);
|
||||||
if is_sparse {
|
if is_sparse {
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ fn interpret_escape(source: &str) -> String {
|
|||||||
|
|
||||||
/// Consume a word outside of any context.
|
/// Consume a word outside of any context.
|
||||||
// TODO should support escape sequences
|
// TODO should support escape sequences
|
||||||
fn word(inp: &str) -> IResult<&str, Cow<str>> {
|
fn word(inp: &str) -> IResult<&str, Cow<'_, str>> {
|
||||||
map_res(
|
map_res(
|
||||||
recognize(tuple((
|
recognize(tuple((
|
||||||
alt((
|
alt((
|
||||||
|
|||||||
@@ -301,7 +301,7 @@ impl SegmentAggregationCollector for SegmentHistogramCollector {
|
|||||||
let bounds = self.bounds;
|
let bounds = self.bounds;
|
||||||
let interval = self.interval;
|
let interval = self.interval;
|
||||||
let offset = self.offset;
|
let offset = self.offset;
|
||||||
let get_bucket_pos = |val| (get_bucket_pos_f64(val, interval, offset) as i64);
|
let get_bucket_pos = |val| get_bucket_pos_f64(val, interval, offset) as i64;
|
||||||
|
|
||||||
bucket_agg_accessor
|
bucket_agg_accessor
|
||||||
.column_block_accessor
|
.column_block_accessor
|
||||||
|
|||||||
@@ -484,7 +484,6 @@ impl FacetCounts {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::iter;
|
|
||||||
|
|
||||||
use columnar::Dictionary;
|
use columnar::Dictionary;
|
||||||
use rand::distributions::Uniform;
|
use rand::distributions::Uniform;
|
||||||
|
|||||||
@@ -484,10 +484,8 @@ impl Directory for MmapDirectory {
|
|||||||
.map_err(LockError::wrap_io_error)?;
|
.map_err(LockError::wrap_io_error)?;
|
||||||
if lock.is_blocking {
|
if lock.is_blocking {
|
||||||
file.lock_exclusive().map_err(LockError::wrap_io_error)?;
|
file.lock_exclusive().map_err(LockError::wrap_io_error)?;
|
||||||
} else {
|
} else if !file.try_lock_exclusive().map_err(|_| LockError::LockBusy)? {
|
||||||
if !file.try_lock_exclusive().map_err(|_| LockError::LockBusy)? {
|
return Err(LockError::LockBusy);
|
||||||
return Err(LockError::LockBusy);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// dropping the file handle will release the lock.
|
// dropping the file handle will release the lock.
|
||||||
Ok(DirectoryLock::from(Box::new(ReleaseLockFile {
|
Ok(DirectoryLock::from(Box::new(ReleaseLockFile {
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ impl InvertedIndexReader {
|
|||||||
positions_size: ByteCount::default(),
|
positions_size: ByteCount::default(),
|
||||||
num_terms: 0u64,
|
num_terms: 0u64,
|
||||||
};
|
};
|
||||||
field_space.record(&term_info);
|
field_space.record(term_info);
|
||||||
|
|
||||||
// We include the json type and the json end of path to make sure the prefix check
|
// We include the json type and the json end of path to make sure the prefix check
|
||||||
// is meaningful.
|
// is meaningful.
|
||||||
|
|||||||
@@ -615,7 +615,7 @@ impl<D: Document> IndexWriter<D> {
|
|||||||
/// It is also possible to add a payload to the `commit`
|
/// It is also possible to add a payload to the `commit`
|
||||||
/// using this API.
|
/// using this API.
|
||||||
/// See [`PreparedCommit::set_payload()`].
|
/// See [`PreparedCommit::set_payload()`].
|
||||||
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<D>> {
|
pub fn prepare_commit(&mut self) -> crate::Result<PreparedCommit<'_, D>> {
|
||||||
// Here, because we join all of the worker threads,
|
// Here, because we join all of the worker threads,
|
||||||
// all of the segment update for this commit have been
|
// all of the segment update for this commit have been
|
||||||
// sent.
|
// sent.
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ impl InvertedIndexSerializer {
|
|||||||
field: Field,
|
field: Field,
|
||||||
total_num_tokens: u64,
|
total_num_tokens: u64,
|
||||||
fieldnorm_reader: Option<FieldNormReader>,
|
fieldnorm_reader: Option<FieldNormReader>,
|
||||||
) -> io::Result<FieldSerializer> {
|
) -> io::Result<FieldSerializer<'_>> {
|
||||||
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
|
let field_entry: &FieldEntry = self.schema.get_field_entry(field);
|
||||||
let term_dictionary_write = self.terms_write.for_field(field);
|
let term_dictionary_write = self.terms_write.for_field(field);
|
||||||
let postings_write = self.postings_write.for_field(field);
|
let postings_write = self.postings_write.for_field(field);
|
||||||
@@ -126,7 +126,7 @@ impl<'a> FieldSerializer<'a> {
|
|||||||
let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?;
|
let term_dictionary_builder = TermDictionaryBuilder::create(term_dictionary_write)?;
|
||||||
let average_fieldnorm = fieldnorm_reader
|
let average_fieldnorm = fieldnorm_reader
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|ff_reader| (total_num_tokens as Score / ff_reader.num_docs() as Score))
|
.map(|ff_reader| total_num_tokens as Score / ff_reader.num_docs() as Score)
|
||||||
.unwrap_or(0.0);
|
.unwrap_or(0.0);
|
||||||
let postings_serializer = PostingsSerializer::new(
|
let postings_serializer = PostingsSerializer::new(
|
||||||
postings_write,
|
postings_write,
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use crate::fieldnorm::FieldNormReader;
|
use crate::fieldnorm::FieldNormReader;
|
||||||
use crate::query::Explanation;
|
use crate::query::Explanation;
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
@@ -68,12 +66,6 @@ fn compute_tf_cache(average_fieldnorm: Score) -> [Score; 256] {
|
|||||||
cache
|
cache
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
|
|
||||||
pub struct Bm25Params {
|
|
||||||
pub idf: Score,
|
|
||||||
pub avg_fieldnorm: Score,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A struct used for computing BM25 scores.
|
/// A struct used for computing BM25 scores.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Bm25Weight {
|
pub struct Bm25Weight {
|
||||||
|
|||||||
@@ -302,7 +302,6 @@ fn is_sorted<I: Iterator<Item = DocId>>(mut it: I) -> bool {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
use std::iter;
|
|
||||||
|
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
|
|
||||||
|
|||||||
@@ -258,7 +258,7 @@ fn search_on_json_numerical_field(
|
|||||||
|
|
||||||
let bounds = match typ.numerical_type().unwrap() {
|
let bounds = match typ.numerical_type().unwrap() {
|
||||||
NumericalType::I64 => {
|
NumericalType::I64 => {
|
||||||
let bounds = bounds.map_bound(|term| (term.as_i64().unwrap()));
|
let bounds = bounds.map_bound(|term| term.as_i64().unwrap());
|
||||||
match actual_column_type {
|
match actual_column_type {
|
||||||
NumericalType::I64 => bounds.map_bound(|&term| term.to_u64()),
|
NumericalType::I64 => bounds.map_bound(|&term| term.to_u64()),
|
||||||
NumericalType::U64 => {
|
NumericalType::U64 => {
|
||||||
@@ -282,7 +282,7 @@ fn search_on_json_numerical_field(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
NumericalType::U64 => {
|
NumericalType::U64 => {
|
||||||
let bounds = bounds.map_bound(|term| (term.as_u64().unwrap()));
|
let bounds = bounds.map_bound(|term| term.as_u64().unwrap());
|
||||||
match actual_column_type {
|
match actual_column_type {
|
||||||
NumericalType::U64 => bounds.map_bound(|&term| term.to_u64()),
|
NumericalType::U64 => bounds.map_bound(|&term| term.to_u64()),
|
||||||
NumericalType::I64 => {
|
NumericalType::I64 => {
|
||||||
@@ -306,7 +306,7 @@ fn search_on_json_numerical_field(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
NumericalType::F64 => {
|
NumericalType::F64 => {
|
||||||
let bounds = bounds.map_bound(|term| (term.as_f64().unwrap()));
|
let bounds = bounds.map_bound(|term| term.as_f64().unwrap());
|
||||||
match actual_column_type {
|
match actual_column_type {
|
||||||
NumericalType::U64 => transform_from_f64_bounds::<u64>(&bounds),
|
NumericalType::U64 => transform_from_f64_bounds::<u64>(&bounds),
|
||||||
NumericalType::I64 => transform_from_f64_bounds::<i64>(&bounds),
|
NumericalType::I64 => transform_from_f64_bounds::<i64>(&bounds),
|
||||||
|
|||||||
@@ -1561,7 +1561,6 @@ fn to_ascii(text: &str, output: &mut String) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::iter;
|
|
||||||
|
|
||||||
use super::to_ascii;
|
use super::to_ascii;
|
||||||
use crate::tokenizer::{AsciiFoldingFilter, RawTokenizer, SimpleTokenizer, TextAnalyzer};
|
use crate::tokenizer::{AsciiFoldingFilter, RawTokenizer, SimpleTokenizer, TextAnalyzer};
|
||||||
|
|||||||
@@ -609,12 +609,12 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
|||||||
|
|
||||||
/// Returns a range builder, to stream all of the terms
|
/// Returns a range builder, to stream all of the terms
|
||||||
/// within an interval.
|
/// within an interval.
|
||||||
pub fn range(&self) -> StreamerBuilder<TSSTable> {
|
pub fn range(&self) -> StreamerBuilder<'_, TSSTable> {
|
||||||
StreamerBuilder::new(self, AlwaysMatch)
|
StreamerBuilder::new(self, AlwaysMatch)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a range builder filtered with a prefix.
|
/// Returns a range builder filtered with a prefix.
|
||||||
pub fn prefix_range<K: AsRef<[u8]>>(&self, prefix: K) -> StreamerBuilder<TSSTable> {
|
pub fn prefix_range<K: AsRef<[u8]>>(&self, prefix: K) -> StreamerBuilder<'_, TSSTable> {
|
||||||
let lower_bound = prefix.as_ref();
|
let lower_bound = prefix.as_ref();
|
||||||
let mut upper_bound = lower_bound.to_vec();
|
let mut upper_bound = lower_bound.to_vec();
|
||||||
for idx in (0..upper_bound.len()).rev() {
|
for idx in (0..upper_bound.len()).rev() {
|
||||||
@@ -633,7 +633,7 @@ impl<TSSTable: SSTable> Dictionary<TSSTable> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A stream of all the sorted terms.
|
/// A stream of all the sorted terms.
|
||||||
pub fn stream(&self) -> io::Result<Streamer<TSSTable>> {
|
pub fn stream(&self) -> io::Result<Streamer<'_, TSSTable>> {
|
||||||
self.range().into_stream()
|
self.range().into_stream()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user