mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2025-12-23 02:29:57 +00:00
Cargo clippy. Acronym should not be full uppercase apparently.
This commit is contained in:
@@ -8,6 +8,7 @@ Tantivy 0.15.0
|
|||||||
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
|
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
|
||||||
- Date field support for range queries (@rihardsk) #516
|
- Date field support for range queries (@rihardsk) #516
|
||||||
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
|
- Added lz4-flex as the default compression scheme in tantivy (@PSeitz) #1009
|
||||||
|
- Renamed a lot of symbols to avoid all uppercasing on acronyms, as per new clippy recommendation. For instance, RAMDireotory -> RamDirectory. (@pmasurel)
|
||||||
|
|
||||||
Tantivy 0.14.0
|
Tantivy 0.14.0
|
||||||
=========================
|
=========================
|
||||||
|
|||||||
@@ -5,11 +5,11 @@ use combine::parser::Parser;
|
|||||||
|
|
||||||
pub use crate::occur::Occur;
|
pub use crate::occur::Occur;
|
||||||
use crate::query_grammar::parse_to_ast;
|
use crate::query_grammar::parse_to_ast;
|
||||||
pub use crate::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
|
pub use crate::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
|
||||||
|
|
||||||
pub struct Error;
|
pub struct Error;
|
||||||
|
|
||||||
pub fn parse_query(query: &str) -> Result<UserInputAST, Error> {
|
pub fn parse_query(query: &str) -> Result<UserInputAst, Error> {
|
||||||
let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?;
|
let (user_input_ast, _remaining) = parse_to_ast().parse(query).map_err(|_| Error)?;
|
||||||
Ok(user_input_ast)
|
Ok(user_input_ast)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use super::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
|
use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
|
||||||
use crate::Occur;
|
use crate::Occur;
|
||||||
use combine::parser::char::{char, digit, letter, space, spaces, string};
|
use combine::parser::char::{char, digit, letter, space, spaces, string};
|
||||||
use combine::parser::Parser;
|
use combine::parser::Parser;
|
||||||
@@ -209,21 +209,21 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn negate(expr: UserInputAST) -> UserInputAST {
|
fn negate(expr: UserInputAst) -> UserInputAst {
|
||||||
expr.unary(Occur::MustNot)
|
expr.unary(Occur::MustNot)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
|
||||||
parser(|input| {
|
parser(|input| {
|
||||||
char('(')
|
char('(')
|
||||||
.with(ast())
|
.with(ast())
|
||||||
.skip(char(')'))
|
.skip(char(')'))
|
||||||
.or(char('*').map(|_| UserInputAST::from(UserInputLeaf::All)))
|
.or(char('*').map(|_| UserInputAst::from(UserInputLeaf::All)))
|
||||||
.or(attempt(
|
.or(attempt(
|
||||||
string("NOT").skip(spaces1()).with(leaf()).map(negate),
|
string("NOT").skip(spaces1()).with(leaf()).map(negate),
|
||||||
))
|
))
|
||||||
.or(attempt(range().map(UserInputAST::from)))
|
.or(attempt(range().map(UserInputAst::from)))
|
||||||
.or(literal().map(UserInputAST::from))
|
.or(literal().map(UserInputAst::from))
|
||||||
.parse_stream(input)
|
.parse_stream(input)
|
||||||
.into_result()
|
.into_result()
|
||||||
})
|
})
|
||||||
@@ -235,7 +235,7 @@ fn occur_symbol<'a>() -> impl Parser<&'a str, Output = Occur> {
|
|||||||
.or(char('+').map(|_| Occur::Must))
|
.or(char('+').map(|_| Occur::Must))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn occur_leaf<'a>() -> impl Parser<&'a str, Output = (Option<Occur>, UserInputAST)> {
|
fn occur_leaf<'a>() -> impl Parser<&'a str, Output = (Option<Occur>, UserInputAst)> {
|
||||||
(optional(occur_symbol()), boosted_leaf())
|
(optional(occur_symbol()), boosted_leaf())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,10 +256,10 @@ fn boost<'a>() -> impl Parser<&'a str, Output = f64> {
|
|||||||
(char('^'), positive_float_number()).map(|(_, boost)| boost)
|
(char('^'), positive_float_number()).map(|(_, boost)| boost)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn boosted_leaf<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
fn boosted_leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
|
||||||
(leaf(), optional(boost())).map(|(leaf, boost_opt)| match boost_opt {
|
(leaf(), optional(boost())).map(|(leaf, boost_opt)| match boost_opt {
|
||||||
Some(boost) if (boost - 1.0).abs() > std::f64::EPSILON => {
|
Some(boost) if (boost - 1.0).abs() > std::f64::EPSILON => {
|
||||||
UserInputAST::Boost(Box::new(leaf), boost)
|
UserInputAst::Boost(Box::new(leaf), boost)
|
||||||
}
|
}
|
||||||
_ => leaf,
|
_ => leaf,
|
||||||
})
|
})
|
||||||
@@ -278,10 +278,10 @@ fn binary_operand<'a>() -> impl Parser<&'a str, Output = BinaryOperand> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn aggregate_binary_expressions(
|
fn aggregate_binary_expressions(
|
||||||
left: UserInputAST,
|
left: UserInputAst,
|
||||||
others: Vec<(BinaryOperand, UserInputAST)>,
|
others: Vec<(BinaryOperand, UserInputAst)>,
|
||||||
) -> UserInputAST {
|
) -> UserInputAst {
|
||||||
let mut dnf: Vec<Vec<UserInputAST>> = vec![vec![left]];
|
let mut dnf: Vec<Vec<UserInputAst>> = vec![vec![left]];
|
||||||
for (operator, operand_ast) in others {
|
for (operator, operand_ast) in others {
|
||||||
match operator {
|
match operator {
|
||||||
BinaryOperand::And => {
|
BinaryOperand::And => {
|
||||||
@@ -295,33 +295,33 @@ fn aggregate_binary_expressions(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if dnf.len() == 1 {
|
if dnf.len() == 1 {
|
||||||
UserInputAST::and(dnf.into_iter().next().unwrap()) //< safe
|
UserInputAst::and(dnf.into_iter().next().unwrap()) //< safe
|
||||||
} else {
|
} else {
|
||||||
let conjunctions = dnf.into_iter().map(UserInputAST::and).collect();
|
let conjunctions = dnf.into_iter().map(UserInputAst::and).collect();
|
||||||
UserInputAST::or(conjunctions)
|
UserInputAst::or(conjunctions)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn operand_leaf<'a>() -> impl Parser<&'a str, Output = (BinaryOperand, UserInputAST)> {
|
fn operand_leaf<'a>() -> impl Parser<&'a str, Output = (BinaryOperand, UserInputAst)> {
|
||||||
(
|
(
|
||||||
binary_operand().skip(spaces()),
|
binary_operand().skip(spaces()),
|
||||||
boosted_leaf().skip(spaces()),
|
boosted_leaf().skip(spaces()),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
pub fn ast<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
|
||||||
let boolean_expr = (boosted_leaf().skip(spaces()), many1(operand_leaf()))
|
let boolean_expr = (boosted_leaf().skip(spaces()), many1(operand_leaf()))
|
||||||
.map(|(left, right)| aggregate_binary_expressions(left, right));
|
.map(|(left, right)| aggregate_binary_expressions(left, right));
|
||||||
let whitespace_separated_leaves = many1(occur_leaf().skip(spaces().silent())).map(
|
let whitespace_separated_leaves = many1(occur_leaf().skip(spaces().silent())).map(
|
||||||
|subqueries: Vec<(Option<Occur>, UserInputAST)>| {
|
|subqueries: Vec<(Option<Occur>, UserInputAst)>| {
|
||||||
if subqueries.len() == 1 {
|
if subqueries.len() == 1 {
|
||||||
let (occur_opt, ast) = subqueries.into_iter().next().unwrap();
|
let (occur_opt, ast) = subqueries.into_iter().next().unwrap();
|
||||||
match occur_opt.unwrap_or(Occur::Should) {
|
match occur_opt.unwrap_or(Occur::Should) {
|
||||||
Occur::Must | Occur::Should => ast,
|
Occur::Must | Occur::Should => ast,
|
||||||
Occur::MustNot => UserInputAST::Clause(vec![(Some(Occur::MustNot), ast)]),
|
Occur::MustNot => UserInputAst::Clause(vec![(Some(Occur::MustNot), ast)]),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
UserInputAST::Clause(subqueries.into_iter().collect())
|
UserInputAst::Clause(subqueries.into_iter().collect())
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -329,10 +329,10 @@ pub fn ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
|||||||
spaces().with(expr).skip(spaces())
|
spaces().with(expr).skip(spaces())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAST> {
|
pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
|
||||||
spaces()
|
spaces()
|
||||||
.with(optional(ast()).skip(eof()))
|
.with(optional(ast()).skip(eof()))
|
||||||
.map(|opt_ast| opt_ast.unwrap_or_else(UserInputAST::empty_query))
|
.map(|opt_ast| opt_ast.unwrap_or_else(UserInputAst::empty_query))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -84,41 +84,41 @@ impl UserInputBound {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum UserInputAST {
|
pub enum UserInputAst {
|
||||||
Clause(Vec<(Option<Occur>, UserInputAST)>),
|
Clause(Vec<(Option<Occur>, UserInputAst)>),
|
||||||
Leaf(Box<UserInputLeaf>),
|
Leaf(Box<UserInputLeaf>),
|
||||||
Boost(Box<UserInputAST>, f64),
|
Boost(Box<UserInputAst>, f64),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UserInputAST {
|
impl UserInputAst {
|
||||||
pub fn unary(self, occur: Occur) -> UserInputAST {
|
pub fn unary(self, occur: Occur) -> UserInputAst {
|
||||||
UserInputAST::Clause(vec![(Some(occur), self)])
|
UserInputAst::Clause(vec![(Some(occur), self)])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compose(occur: Occur, asts: Vec<UserInputAST>) -> UserInputAST {
|
fn compose(occur: Occur, asts: Vec<UserInputAst>) -> UserInputAst {
|
||||||
assert_ne!(occur, Occur::MustNot);
|
assert_ne!(occur, Occur::MustNot);
|
||||||
assert!(!asts.is_empty());
|
assert!(!asts.is_empty());
|
||||||
if asts.len() == 1 {
|
if asts.len() == 1 {
|
||||||
asts.into_iter().next().unwrap() //< safe
|
asts.into_iter().next().unwrap() //< safe
|
||||||
} else {
|
} else {
|
||||||
UserInputAST::Clause(
|
UserInputAst::Clause(
|
||||||
asts.into_iter()
|
asts.into_iter()
|
||||||
.map(|ast: UserInputAST| (Some(occur), ast))
|
.map(|ast: UserInputAst| (Some(occur), ast))
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn empty_query() -> UserInputAST {
|
pub fn empty_query() -> UserInputAst {
|
||||||
UserInputAST::Clause(Vec::default())
|
UserInputAst::Clause(Vec::default())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn and(asts: Vec<UserInputAST>) -> UserInputAST {
|
pub fn and(asts: Vec<UserInputAst>) -> UserInputAst {
|
||||||
UserInputAST::compose(Occur::Must, asts)
|
UserInputAst::compose(Occur::Must, asts)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn or(asts: Vec<UserInputAST>) -> UserInputAST {
|
pub fn or(asts: Vec<UserInputAst>) -> UserInputAst {
|
||||||
UserInputAST::compose(Occur::Should, asts)
|
UserInputAst::compose(Occur::Should, asts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -128,15 +128,15 @@ impl From<UserInputLiteral> for UserInputLeaf {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<UserInputLeaf> for UserInputAST {
|
impl From<UserInputLeaf> for UserInputAst {
|
||||||
fn from(leaf: UserInputLeaf) -> UserInputAST {
|
fn from(leaf: UserInputLeaf) -> UserInputAst {
|
||||||
UserInputAST::Leaf(Box::new(leaf))
|
UserInputAst::Leaf(Box::new(leaf))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_occur_ast(
|
fn print_occur_ast(
|
||||||
occur_opt: Option<Occur>,
|
occur_opt: Option<Occur>,
|
||||||
ast: &UserInputAST,
|
ast: &UserInputAst,
|
||||||
formatter: &mut fmt::Formatter,
|
formatter: &mut fmt::Formatter,
|
||||||
) -> fmt::Result {
|
) -> fmt::Result {
|
||||||
if let Some(occur) = occur_opt {
|
if let Some(occur) = occur_opt {
|
||||||
@@ -147,10 +147,10 @@ fn print_occur_ast(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for UserInputAST {
|
impl fmt::Debug for UserInputAst {
|
||||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match *self {
|
match *self {
|
||||||
UserInputAST::Clause(ref subqueries) => {
|
UserInputAst::Clause(ref subqueries) => {
|
||||||
if subqueries.is_empty() {
|
if subqueries.is_empty() {
|
||||||
write!(formatter, "<emptyclause>")?;
|
write!(formatter, "<emptyclause>")?;
|
||||||
} else {
|
} else {
|
||||||
@@ -164,8 +164,8 @@ impl fmt::Debug for UserInputAST {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
UserInputAST::Leaf(ref subquery) => write!(formatter, "{:?}", subquery),
|
UserInputAst::Leaf(ref subquery) => write!(formatter, "{:?}", subquery),
|
||||||
UserInputAST::Boost(ref leaf, boost) => write!(formatter, "({:?})^{}", leaf, boost),
|
UserInputAst::Boost(ref leaf, boost) => write!(formatter, "({:?})^{}", leaf, boost),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ mod test {
|
|||||||
use super::{CompositeFile, CompositeWrite};
|
use super::{CompositeFile, CompositeWrite};
|
||||||
use crate::common::BinarySerializable;
|
use crate::common::BinarySerializable;
|
||||||
use crate::common::VInt;
|
use crate::common::VInt;
|
||||||
use crate::directory::{Directory, RAMDirectory};
|
use crate::directory::{Directory, RamDirectory};
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -198,7 +198,7 @@ mod test {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_composite_file() -> crate::Result<()> {
|
fn test_composite_file() -> crate::Result<()> {
|
||||||
let path = Path::new("test_path");
|
let path = Path::new("test_path");
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let w = directory.open_write(path).unwrap();
|
let w = directory.open_write(path).unwrap();
|
||||||
let mut composite_write = CompositeWrite::wrap(w);
|
let mut composite_write = CompositeWrite::wrap(w);
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use crate::directory::ManagedDirectory;
|
|||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
use crate::directory::MmapDirectory;
|
use crate::directory::MmapDirectory;
|
||||||
use crate::directory::INDEX_WRITER_LOCK;
|
use crate::directory::INDEX_WRITER_LOCK;
|
||||||
use crate::directory::{Directory, RAMDirectory};
|
use crate::directory::{Directory, RamDirectory};
|
||||||
use crate::error::DataCorruption;
|
use crate::error::DataCorruption;
|
||||||
use crate::error::TantivyError;
|
use crate::error::TantivyError;
|
||||||
use crate::indexer::index_writer::HEAP_SIZE_MIN;
|
use crate::indexer::index_writer::HEAP_SIZE_MIN;
|
||||||
@@ -97,13 +97,13 @@ impl Index {
|
|||||||
self.set_multithread_executor(default_num_threads)
|
self.set_multithread_executor(default_num_threads)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index using the `RAMDirectory`.
|
/// Creates a new index using the `RamDirectory`.
|
||||||
///
|
///
|
||||||
/// The index will be allocated in anonymous memory.
|
/// The index will be allocated in anonymous memory.
|
||||||
/// This should only be used for unit tests.
|
/// This should only be used for unit tests.
|
||||||
pub fn create_in_ram(schema: Schema) -> Index {
|
pub fn create_in_ram(schema: Schema) -> Index {
|
||||||
let ram_directory = RAMDirectory::create();
|
let ram_directory = RamDirectory::create();
|
||||||
Index::create(ram_directory, schema).expect("Creating a RAMDirectory should never fail")
|
Index::create(ram_directory, schema).expect("Creating a RamDirectory should never fail")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new index in a given filepath.
|
/// Creates a new index in a given filepath.
|
||||||
@@ -144,7 +144,7 @@ impl Index {
|
|||||||
/// is destroyed.
|
/// is destroyed.
|
||||||
///
|
///
|
||||||
/// The temp directory is only used for testing the `MmapDirectory`.
|
/// The temp directory is only used for testing the `MmapDirectory`.
|
||||||
/// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
|
/// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`.
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
|
pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
|
||||||
let mmap_directory = MmapDirectory::create_from_tempdir()?;
|
let mmap_directory = MmapDirectory::create_from_tempdir()?;
|
||||||
@@ -282,7 +282,7 @@ impl Index {
|
|||||||
/// Each thread will receive a budget of `overall_heap_size_in_bytes / num_threads`.
|
/// Each thread will receive a budget of `overall_heap_size_in_bytes / num_threads`.
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IOError`.
|
/// If the lockfile already exists, returns `Error::DirectoryLockBusy` or an `Error::IoError`.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
/// If the heap size per thread is too small, panics.
|
/// If the heap size per thread is too small, panics.
|
||||||
@@ -411,7 +411,7 @@ impl fmt::Debug for Index {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::directory::{RAMDirectory, WatchCallback};
|
use crate::directory::{RamDirectory, WatchCallback};
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
use crate::schema::{Schema, INDEXED, TEXT};
|
use crate::schema::{Schema, INDEXED, TEXT};
|
||||||
use crate::IndexReader;
|
use crate::IndexReader;
|
||||||
@@ -434,7 +434,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_index_exists() {
|
fn test_index_exists() {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
assert!(!Index::exists(&directory).unwrap());
|
assert!(!Index::exists(&directory).unwrap());
|
||||||
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
||||||
assert!(Index::exists(&directory).unwrap());
|
assert!(Index::exists(&directory).unwrap());
|
||||||
@@ -442,7 +442,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn open_or_create_should_create() {
|
fn open_or_create_should_create() {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
assert!(!Index::exists(&directory).unwrap());
|
assert!(!Index::exists(&directory).unwrap());
|
||||||
assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
|
assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
|
||||||
assert!(Index::exists(&directory).unwrap());
|
assert!(Index::exists(&directory).unwrap());
|
||||||
@@ -450,7 +450,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn open_or_create_should_open() {
|
fn open_or_create_should_open() {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
||||||
assert!(Index::exists(&directory).unwrap());
|
assert!(Index::exists(&directory).unwrap());
|
||||||
assert!(Index::open_or_create(directory, throw_away_schema()).is_ok());
|
assert!(Index::open_or_create(directory, throw_away_schema()).is_ok());
|
||||||
@@ -458,7 +458,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn create_should_wipeoff_existing() {
|
fn create_should_wipeoff_existing() {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
||||||
assert!(Index::exists(&directory).unwrap());
|
assert!(Index::exists(&directory).unwrap());
|
||||||
assert!(Index::create(directory.clone(), Schema::builder().build()).is_ok());
|
assert!(Index::create(directory.clone(), Schema::builder().build()).is_ok());
|
||||||
@@ -466,7 +466,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn open_or_create_exists_but_schema_does_not_match() {
|
fn open_or_create_exists_but_schema_does_not_match() {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
assert!(Index::create(directory.clone(), throw_away_schema()).is_ok());
|
||||||
assert!(Index::exists(&directory).unwrap());
|
assert!(Index::exists(&directory).unwrap());
|
||||||
assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
|
assert!(Index::open_or_create(directory.clone(), throw_away_schema()).is_ok());
|
||||||
@@ -599,7 +599,7 @@ mod tests {
|
|||||||
#[cfg(not(target_os = "windows"))]
|
#[cfg(not(target_os = "windows"))]
|
||||||
#[test]
|
#[test]
|
||||||
fn garbage_collect_works_as_intended() {
|
fn garbage_collect_works_as_intended() {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let schema = throw_away_schema();
|
let schema = throw_away_schema();
|
||||||
let field = schema.get_field("num_likes").unwrap();
|
let field = schema.get_field("num_likes").unwrap();
|
||||||
let index = Index::create(directory.clone(), schema).unwrap();
|
let index = Index::create(directory.clone(), schema).unwrap();
|
||||||
|
|||||||
@@ -108,14 +108,14 @@ impl SegmentMeta {
|
|||||||
pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
|
pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
|
||||||
let mut path = self.id().uuid_string();
|
let mut path = self.id().uuid_string();
|
||||||
path.push_str(&*match component {
|
path.push_str(&*match component {
|
||||||
SegmentComponent::POSTINGS => ".idx".to_string(),
|
SegmentComponent::Postings => ".idx".to_string(),
|
||||||
SegmentComponent::POSITIONS => ".pos".to_string(),
|
SegmentComponent::Positions => ".pos".to_string(),
|
||||||
SegmentComponent::POSITIONSSKIP => ".posidx".to_string(),
|
SegmentComponent::PositionsSkip => ".posidx".to_string(),
|
||||||
SegmentComponent::TERMS => ".term".to_string(),
|
SegmentComponent::Terms => ".term".to_string(),
|
||||||
SegmentComponent::STORE => ".store".to_string(),
|
SegmentComponent::Store => ".store".to_string(),
|
||||||
SegmentComponent::FASTFIELDS => ".fast".to_string(),
|
SegmentComponent::FastFields => ".fast".to_string(),
|
||||||
SegmentComponent::FIELDNORMS => ".fieldnorm".to_string(),
|
SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
|
||||||
SegmentComponent::DELETE => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
|
SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
|
||||||
});
|
});
|
||||||
PathBuf::from(path)
|
PathBuf::from(path)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,39 +7,39 @@ use std::slice;
|
|||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub enum SegmentComponent {
|
pub enum SegmentComponent {
|
||||||
/// Postings (or inverted list). Sorted lists of document ids, associated to terms
|
/// Postings (or inverted list). Sorted lists of document ids, associated to terms
|
||||||
POSTINGS,
|
Postings,
|
||||||
/// Positions of terms in each document.
|
/// Positions of terms in each document.
|
||||||
POSITIONS,
|
Positions,
|
||||||
/// Index to seek within the position file
|
/// Index to seek within the position file
|
||||||
POSITIONSSKIP,
|
PositionsSkip,
|
||||||
/// Column-oriented random-access storage of fields.
|
/// Column-oriented random-access storage of fields.
|
||||||
FASTFIELDS,
|
FastFields,
|
||||||
/// Stores the sum of the length (in terms) of each field for each document.
|
/// Stores the sum of the length (in terms) of each field for each document.
|
||||||
/// Field norms are stored as a special u64 fast field.
|
/// Field norms are stored as a special u64 fast field.
|
||||||
FIELDNORMS,
|
FieldNorms,
|
||||||
/// Dictionary associating `Term`s to `TermInfo`s which is
|
/// Dictionary associating `Term`s to `TermInfo`s which is
|
||||||
/// simply an address into the `postings` file and the `positions` file.
|
/// simply an address into the `postings` file and the `positions` file.
|
||||||
TERMS,
|
Terms,
|
||||||
/// Row-oriented, compressed storage of the documents.
|
/// Row-oriented, compressed storage of the documents.
|
||||||
/// Accessing a document from the store is relatively slow, as it
|
/// Accessing a document from the store is relatively slow, as it
|
||||||
/// requires to decompress the entire block it belongs to.
|
/// requires to decompress the entire block it belongs to.
|
||||||
STORE,
|
Store,
|
||||||
/// Bitset describing which document of the segment is deleted.
|
/// Bitset describing which document of the segment is deleted.
|
||||||
DELETE,
|
Delete,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SegmentComponent {
|
impl SegmentComponent {
|
||||||
/// Iterates through the components.
|
/// Iterates through the components.
|
||||||
pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
|
pub fn iterator() -> slice::Iter<'static, SegmentComponent> {
|
||||||
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
|
static SEGMENT_COMPONENTS: [SegmentComponent; 8] = [
|
||||||
SegmentComponent::POSTINGS,
|
SegmentComponent::Postings,
|
||||||
SegmentComponent::POSITIONS,
|
SegmentComponent::Positions,
|
||||||
SegmentComponent::POSITIONSSKIP,
|
SegmentComponent::PositionsSkip,
|
||||||
SegmentComponent::FASTFIELDS,
|
SegmentComponent::FastFields,
|
||||||
SegmentComponent::FIELDNORMS,
|
SegmentComponent::FieldNorms,
|
||||||
SegmentComponent::TERMS,
|
SegmentComponent::Terms,
|
||||||
SegmentComponent::STORE,
|
SegmentComponent::Store,
|
||||||
SegmentComponent::DELETE,
|
SegmentComponent::Delete,
|
||||||
];
|
];
|
||||||
SEGMENT_COMPONENTS.iter()
|
SEGMENT_COMPONENTS.iter()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -151,18 +151,18 @@ impl SegmentReader {
|
|||||||
|
|
||||||
/// Open a new segment for reading.
|
/// Open a new segment for reading.
|
||||||
pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
|
pub fn open(segment: &Segment) -> crate::Result<SegmentReader> {
|
||||||
let termdict_file = segment.open_read(SegmentComponent::TERMS)?;
|
let termdict_file = segment.open_read(SegmentComponent::Terms)?;
|
||||||
let termdict_composite = CompositeFile::open(&termdict_file)?;
|
let termdict_composite = CompositeFile::open(&termdict_file)?;
|
||||||
|
|
||||||
let store_file = segment.open_read(SegmentComponent::STORE)?;
|
let store_file = segment.open_read(SegmentComponent::Store)?;
|
||||||
|
|
||||||
fail_point!("SegmentReader::open#middle");
|
fail_point!("SegmentReader::open#middle");
|
||||||
|
|
||||||
let postings_file = segment.open_read(SegmentComponent::POSTINGS)?;
|
let postings_file = segment.open_read(SegmentComponent::Postings)?;
|
||||||
let postings_composite = CompositeFile::open(&postings_file)?;
|
let postings_composite = CompositeFile::open(&postings_file)?;
|
||||||
|
|
||||||
let positions_composite = {
|
let positions_composite = {
|
||||||
if let Ok(positions_file) = segment.open_read(SegmentComponent::POSITIONS) {
|
if let Ok(positions_file) = segment.open_read(SegmentComponent::Positions) {
|
||||||
CompositeFile::open(&positions_file)?
|
CompositeFile::open(&positions_file)?
|
||||||
} else {
|
} else {
|
||||||
CompositeFile::empty()
|
CompositeFile::empty()
|
||||||
@@ -170,7 +170,7 @@ impl SegmentReader {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let positions_idx_composite = {
|
let positions_idx_composite = {
|
||||||
if let Ok(positions_skip_file) = segment.open_read(SegmentComponent::POSITIONSSKIP) {
|
if let Ok(positions_skip_file) = segment.open_read(SegmentComponent::PositionsSkip) {
|
||||||
CompositeFile::open(&positions_skip_file)?
|
CompositeFile::open(&positions_skip_file)?
|
||||||
} else {
|
} else {
|
||||||
CompositeFile::empty()
|
CompositeFile::empty()
|
||||||
@@ -179,16 +179,16 @@ impl SegmentReader {
|
|||||||
|
|
||||||
let schema = segment.schema();
|
let schema = segment.schema();
|
||||||
|
|
||||||
let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?;
|
let fast_fields_data = segment.open_read(SegmentComponent::FastFields)?;
|
||||||
let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
|
let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
|
||||||
let fast_field_readers =
|
let fast_field_readers =
|
||||||
Arc::new(FastFieldReaders::new(schema.clone(), fast_fields_composite));
|
Arc::new(FastFieldReaders::new(schema.clone(), fast_fields_composite));
|
||||||
|
|
||||||
let fieldnorm_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
|
let fieldnorm_data = segment.open_read(SegmentComponent::FieldNorms)?;
|
||||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||||
|
|
||||||
let delete_bitset_opt = if segment.meta().has_deletes() {
|
let delete_bitset_opt = if segment.meta().has_deletes() {
|
||||||
let delete_data = segment.open_read(SegmentComponent::DELETE)?;
|
let delete_data = segment.open_read(SegmentComponent::Delete)?;
|
||||||
let delete_bitset = DeleteBitSet::open(delete_data)?;
|
let delete_bitset = DeleteBitSet::open(delete_data)?;
|
||||||
Some(delete_bitset)
|
Some(delete_bitset)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ impl Drop for DirectoryLockGuard {
|
|||||||
|
|
||||||
enum TryAcquireLockError {
|
enum TryAcquireLockError {
|
||||||
FileExists,
|
FileExists,
|
||||||
IOError(io::Error),
|
IoError(io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_acquire_lock(
|
fn try_acquire_lock(
|
||||||
@@ -79,9 +79,9 @@ fn try_acquire_lock(
|
|||||||
) -> Result<DirectoryLock, TryAcquireLockError> {
|
) -> Result<DirectoryLock, TryAcquireLockError> {
|
||||||
let mut write = directory.open_write(filepath).map_err(|e| match e {
|
let mut write = directory.open_write(filepath).map_err(|e| match e {
|
||||||
OpenWriteError::FileAlreadyExists(_) => TryAcquireLockError::FileExists,
|
OpenWriteError::FileAlreadyExists(_) => TryAcquireLockError::FileExists,
|
||||||
OpenWriteError::IOError { io_error, .. } => TryAcquireLockError::IOError(io_error),
|
OpenWriteError::IoError { io_error, .. } => TryAcquireLockError::IoError(io_error),
|
||||||
})?;
|
})?;
|
||||||
write.flush().map_err(TryAcquireLockError::IOError)?;
|
write.flush().map_err(TryAcquireLockError::IoError)?;
|
||||||
Ok(DirectoryLock::from(Box::new(DirectoryLockGuard {
|
Ok(DirectoryLock::from(Box::new(DirectoryLockGuard {
|
||||||
directory: directory.box_clone(),
|
directory: directory.box_clone(),
|
||||||
path: filepath.to_owned(),
|
path: filepath.to_owned(),
|
||||||
@@ -106,7 +106,7 @@ fn retry_policy(is_blocking: bool) -> RetryPolicy {
|
|||||||
///
|
///
|
||||||
/// - The [`MMapDirectory`](struct.MmapDirectory.html), this
|
/// - The [`MMapDirectory`](struct.MmapDirectory.html), this
|
||||||
/// should be your default choice.
|
/// should be your default choice.
|
||||||
/// - The [`RAMDirectory`](struct.RAMDirectory.html), which
|
/// - The [`RamDirectory`](struct.RamDirectory.html), which
|
||||||
/// should be used mostly for tests.
|
/// should be used mostly for tests.
|
||||||
pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
||||||
/// Opens a file and returns a boxed `FileHandle`.
|
/// Opens a file and returns a boxed `FileHandle`.
|
||||||
@@ -154,7 +154,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
/// Flush operation should also be persistent.
|
/// Flush operation should also be persistent.
|
||||||
///
|
///
|
||||||
/// The user shall not rely on `Drop` triggering `flush`.
|
/// The user shall not rely on `Drop` triggering `flush`.
|
||||||
/// Note that `RAMDirectory` will panic! if `flush`
|
/// Note that `RamDirectory` will panic! if `flush`
|
||||||
/// was not called.
|
/// was not called.
|
||||||
///
|
///
|
||||||
/// The file may not previously exist.
|
/// The file may not previously exist.
|
||||||
@@ -192,8 +192,8 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
|
|||||||
return Err(LockError::LockBusy);
|
return Err(LockError::LockBusy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(TryAcquireLockError::IOError(io_error)) => {
|
Err(TryAcquireLockError::IoError(io_error)) => {
|
||||||
return Err(LockError::IOError(io_error));
|
return Err(LockError::IoError(io_error));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ pub enum LockError {
|
|||||||
/// - In the context of a non-blocking lock, this means the lock was busy at the moment of the call.
|
/// - In the context of a non-blocking lock, this means the lock was busy at the moment of the call.
|
||||||
#[error("Could not acquire lock as it is already held, possibly by a different process.")]
|
#[error("Could not acquire lock as it is already held, possibly by a different process.")]
|
||||||
LockBusy,
|
LockBusy,
|
||||||
/// Trying to acquire a lock failed with an `IOError`
|
/// Trying to acquire a lock failed with an `IoError`
|
||||||
#[error("Failed to acquire the lock due to an io:Error.")]
|
#[error("Failed to acquire the lock due to an io:Error.")]
|
||||||
IOError(io::Error),
|
IoError(io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Error that may occur when opening a directory
|
/// Error that may occur when opening a directory
|
||||||
@@ -30,7 +30,7 @@ pub enum OpenDirectoryError {
|
|||||||
#[error("Failed to create a temporary directory: '{0}'.")]
|
#[error("Failed to create a temporary directory: '{0}'.")]
|
||||||
FailedToCreateTempDir(io::Error),
|
FailedToCreateTempDir(io::Error),
|
||||||
/// IoError
|
/// IoError
|
||||||
#[error("IOError '{io_error:?}' while create directory in: '{directory_path:?}'.")]
|
#[error("IoError '{io_error:?}' while create directory in: '{directory_path:?}'.")]
|
||||||
IoError {
|
IoError {
|
||||||
/// underlying io Error.
|
/// underlying io Error.
|
||||||
io_error: io::Error,
|
io_error: io::Error,
|
||||||
@@ -48,8 +48,8 @@ pub enum OpenWriteError {
|
|||||||
FileAlreadyExists(PathBuf),
|
FileAlreadyExists(PathBuf),
|
||||||
/// Any kind of IO error that happens when
|
/// Any kind of IO error that happens when
|
||||||
/// writing in the underlying IO device.
|
/// writing in the underlying IO device.
|
||||||
#[error("IOError '{io_error:?}' while opening file for write: '{filepath}'.")]
|
#[error("IoError '{io_error:?}' while opening file for write: '{filepath}'.")]
|
||||||
IOError {
|
IoError {
|
||||||
/// The underlying `io::Error`.
|
/// The underlying `io::Error`.
|
||||||
io_error: io::Error,
|
io_error: io::Error,
|
||||||
/// File path of the file that tantivy failed to open for write.
|
/// File path of the file that tantivy failed to open for write.
|
||||||
@@ -60,7 +60,7 @@ pub enum OpenWriteError {
|
|||||||
impl OpenWriteError {
|
impl OpenWriteError {
|
||||||
/// Wraps an io error.
|
/// Wraps an io error.
|
||||||
pub fn wrap_io_error(io_error: io::Error, filepath: PathBuf) -> Self {
|
pub fn wrap_io_error(io_error: io::Error, filepath: PathBuf) -> Self {
|
||||||
Self::IOError { io_error, filepath }
|
Self::IoError { io_error, filepath }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Type of index incompatibility between the library and the index found on disk
|
/// Type of index incompatibility between the library and the index found on disk
|
||||||
@@ -130,9 +130,9 @@ pub enum OpenReadError {
|
|||||||
FileDoesNotExist(PathBuf),
|
FileDoesNotExist(PathBuf),
|
||||||
/// Any kind of io::Error.
|
/// Any kind of io::Error.
|
||||||
#[error(
|
#[error(
|
||||||
"IOError: '{io_error:?}' happened while opening the following file for Read: {filepath}."
|
"IoError: '{io_error:?}' happened while opening the following file for Read: {filepath}."
|
||||||
)]
|
)]
|
||||||
IOError {
|
IoError {
|
||||||
/// The underlying `io::Error`.
|
/// The underlying `io::Error`.
|
||||||
io_error: io::Error,
|
io_error: io::Error,
|
||||||
/// File path of the file that tantivy failed to open for read.
|
/// File path of the file that tantivy failed to open for read.
|
||||||
@@ -146,7 +146,7 @@ pub enum OpenReadError {
|
|||||||
impl OpenReadError {
|
impl OpenReadError {
|
||||||
/// Wraps an io error.
|
/// Wraps an io error.
|
||||||
pub fn wrap_io_error(io_error: io::Error, filepath: PathBuf) -> Self {
|
pub fn wrap_io_error(io_error: io::Error, filepath: PathBuf) -> Self {
|
||||||
Self::IOError { io_error, filepath }
|
Self::IoError { io_error, filepath }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Error that may occur when trying to delete a file
|
/// Error that may occur when trying to delete a file
|
||||||
@@ -158,7 +158,7 @@ pub enum DeleteError {
|
|||||||
/// Any kind of IO error that happens when
|
/// Any kind of IO error that happens when
|
||||||
/// interacting with the underlying IO device.
|
/// interacting with the underlying IO device.
|
||||||
#[error("The following IO error happened while deleting file '{filepath}': '{io_error:?}'.")]
|
#[error("The following IO error happened while deleting file '{filepath}': '{io_error:?}'.")]
|
||||||
IOError {
|
IoError {
|
||||||
/// The underlying `io::Error`.
|
/// The underlying `io::Error`.
|
||||||
io_error: io::Error,
|
io_error: io::Error,
|
||||||
/// File path of the file that tantivy failed to delete.
|
/// File path of the file that tantivy failed to delete.
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ impl ManagedDirectory {
|
|||||||
directory: Box::new(directory),
|
directory: Box::new(directory),
|
||||||
meta_informations: Arc::default(),
|
meta_informations: Arc::default(),
|
||||||
}),
|
}),
|
||||||
io_err @ Err(OpenReadError::IOError { .. }) => Err(io_err.err().unwrap().into()),
|
io_err @ Err(OpenReadError::IoError { .. }) => Err(io_err.err().unwrap().into()),
|
||||||
Err(OpenReadError::IncompatibleIndex(incompatibility)) => {
|
Err(OpenReadError::IncompatibleIndex(incompatibility)) => {
|
||||||
// For the moment, this should never happen `meta.json`
|
// For the moment, this should never happen `meta.json`
|
||||||
// do not have any footer and cannot detect incompatibility.
|
// do not have any footer and cannot detect incompatibility.
|
||||||
@@ -168,7 +168,7 @@ impl ManagedDirectory {
|
|||||||
DeleteError::FileDoesNotExist(_) => {
|
DeleteError::FileDoesNotExist(_) => {
|
||||||
deleted_files.push(file_to_delete.clone());
|
deleted_files.push(file_to_delete.clone());
|
||||||
}
|
}
|
||||||
DeleteError::IOError { .. } => {
|
DeleteError::IoError { .. } => {
|
||||||
failed_to_delete_files.push(file_to_delete.clone());
|
failed_to_delete_files.push(file_to_delete.clone());
|
||||||
if !cfg!(target_os = "windows") {
|
if !cfg!(target_os = "windows") {
|
||||||
// On windows, delete is expected to fail if the file
|
// On windows, delete is expected to fail if the file
|
||||||
@@ -232,13 +232,13 @@ impl ManagedDirectory {
|
|||||||
pub fn validate_checksum(&self, path: &Path) -> result::Result<bool, OpenReadError> {
|
pub fn validate_checksum(&self, path: &Path) -> result::Result<bool, OpenReadError> {
|
||||||
let reader = self.directory.open_read(path)?;
|
let reader = self.directory.open_read(path)?;
|
||||||
let (footer, data) =
|
let (footer, data) =
|
||||||
Footer::extract_footer(reader).map_err(|io_error| OpenReadError::IOError {
|
Footer::extract_footer(reader).map_err(|io_error| OpenReadError::IoError {
|
||||||
io_error,
|
io_error,
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
})?;
|
})?;
|
||||||
let bytes = data
|
let bytes = data
|
||||||
.read_bytes()
|
.read_bytes()
|
||||||
.map_err(|io_error| OpenReadError::IOError {
|
.map_err(|io_error| OpenReadError::IoError {
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
io_error,
|
io_error,
|
||||||
})?;
|
})?;
|
||||||
|
|||||||
@@ -185,7 +185,7 @@ impl MmapDirectory {
|
|||||||
/// Creates a new MmapDirectory in a temporary directory.
|
/// Creates a new MmapDirectory in a temporary directory.
|
||||||
///
|
///
|
||||||
/// This is mostly useful to test the MmapDirectory itself.
|
/// This is mostly useful to test the MmapDirectory itself.
|
||||||
/// For your unit tests, prefer the RAMDirectory.
|
/// For your unit tests, prefer the RamDirectory.
|
||||||
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
|
pub fn create_from_tempdir() -> Result<MmapDirectory, OpenDirectoryError> {
|
||||||
let tempdir = TempDir::new().map_err(OpenDirectoryError::FailedToCreateTempDir)?;
|
let tempdir = TempDir::new().map_err(OpenDirectoryError::FailedToCreateTempDir)?;
|
||||||
Ok(MmapDirectory::new(
|
Ok(MmapDirectory::new(
|
||||||
@@ -374,7 +374,7 @@ impl Directory for MmapDirectory {
|
|||||||
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
||||||
let full_path = self.resolve_path(path);
|
let full_path = self.resolve_path(path);
|
||||||
match fs::remove_file(&full_path) {
|
match fs::remove_file(&full_path) {
|
||||||
Ok(_) => self.sync_directory().map_err(|e| DeleteError::IOError {
|
Ok(_) => self.sync_directory().map_err(|e| DeleteError::IoError {
|
||||||
io_error: e,
|
io_error: e,
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
}),
|
}),
|
||||||
@@ -382,7 +382,7 @@ impl Directory for MmapDirectory {
|
|||||||
if e.kind() == io::ErrorKind::NotFound {
|
if e.kind() == io::ErrorKind::NotFound {
|
||||||
Err(DeleteError::FileDoesNotExist(path.to_owned()))
|
Err(DeleteError::FileDoesNotExist(path.to_owned()))
|
||||||
} else {
|
} else {
|
||||||
Err(DeleteError::IOError {
|
Err(DeleteError::IoError {
|
||||||
io_error: e,
|
io_error: e,
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
})
|
})
|
||||||
@@ -460,9 +460,9 @@ impl Directory for MmapDirectory {
|
|||||||
.write(true)
|
.write(true)
|
||||||
.create(true) //< if the file does not exist yet, create it.
|
.create(true) //< if the file does not exist yet, create it.
|
||||||
.open(&full_path)
|
.open(&full_path)
|
||||||
.map_err(LockError::IOError)?;
|
.map_err(LockError::IoError)?;
|
||||||
if lock.is_blocking {
|
if lock.is_blocking {
|
||||||
file.lock_exclusive().map_err(LockError::IOError)?;
|
file.lock_exclusive().map_err(LockError::IoError)?;
|
||||||
} else {
|
} else {
|
||||||
file.try_lock_exclusive().map_err(|_| LockError::LockBusy)?
|
file.try_lock_exclusive().map_err(|_| LockError::LockBusy)?
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
|
|||||||
pub(crate) use self::file_slice::{ArcBytes, WeakArcBytes};
|
pub(crate) use self::file_slice::{ArcBytes, WeakArcBytes};
|
||||||
pub use self::file_slice::{FileHandle, FileSlice};
|
pub use self::file_slice::{FileHandle, FileSlice};
|
||||||
pub use self::owned_bytes::OwnedBytes;
|
pub use self::owned_bytes::OwnedBytes;
|
||||||
pub use self::ram_directory::RAMDirectory;
|
pub use self::ram_directory::RamDirectory;
|
||||||
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
|
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
|
||||||
use std::io::{self, BufWriter, Write};
|
use std::io::{self, BufWriter, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ use std::sync::{Arc, RwLock};
|
|||||||
|
|
||||||
use super::FileHandle;
|
use super::FileHandle;
|
||||||
|
|
||||||
/// Writer associated with the `RAMDirectory`
|
/// Writer associated with the `RamDirectory`
|
||||||
///
|
///
|
||||||
/// The Writer just writes a buffer.
|
/// The Writer just writes a buffer.
|
||||||
///
|
///
|
||||||
@@ -26,13 +26,13 @@ use super::FileHandle;
|
|||||||
///
|
///
|
||||||
struct VecWriter {
|
struct VecWriter {
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
shared_directory: RAMDirectory,
|
shared_directory: RamDirectory,
|
||||||
data: Cursor<Vec<u8>>,
|
data: Cursor<Vec<u8>>,
|
||||||
is_flushed: bool,
|
is_flushed: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VecWriter {
|
impl VecWriter {
|
||||||
fn new(path_buf: PathBuf, shared_directory: RAMDirectory) -> VecWriter {
|
fn new(path_buf: PathBuf, shared_directory: RamDirectory) -> VecWriter {
|
||||||
VecWriter {
|
VecWriter {
|
||||||
path: path_buf,
|
path: path_buf,
|
||||||
data: Cursor::new(Vec::new()),
|
data: Cursor::new(Vec::new()),
|
||||||
@@ -119,9 +119,9 @@ impl InnerDirectory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for RAMDirectory {
|
impl fmt::Debug for RamDirectory {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "RAMDirectory")
|
write!(f, "RamDirectory")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,23 +131,23 @@ impl fmt::Debug for RAMDirectory {
|
|||||||
/// Writes are only made visible upon flushing.
|
/// Writes are only made visible upon flushing.
|
||||||
///
|
///
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct RAMDirectory {
|
pub struct RamDirectory {
|
||||||
fs: Arc<RwLock<InnerDirectory>>,
|
fs: Arc<RwLock<InnerDirectory>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RAMDirectory {
|
impl RamDirectory {
|
||||||
/// Constructor
|
/// Constructor
|
||||||
pub fn create() -> RAMDirectory {
|
pub fn create() -> RamDirectory {
|
||||||
Self::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the sum of the size of the different files
|
/// Returns the sum of the size of the different files
|
||||||
/// in the RAMDirectory.
|
/// in the RamDirectory.
|
||||||
pub fn total_mem_usage(&self) -> usize {
|
pub fn total_mem_usage(&self) -> usize {
|
||||||
self.fs.read().unwrap().total_mem_usage()
|
self.fs.read().unwrap().total_mem_usage()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write a copy of all of the files saved in the RAMDirectory in the target `Directory`.
|
/// Write a copy of all of the files saved in the RamDirectory in the target `Directory`.
|
||||||
///
|
///
|
||||||
/// Files are all written using the `Directory::write` meaning, even if they were
|
/// Files are all written using the `Directory::write` meaning, even if they were
|
||||||
/// written using the `atomic_write` api.
|
/// written using the `atomic_write` api.
|
||||||
@@ -164,7 +164,7 @@ impl RAMDirectory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Directory for RAMDirectory {
|
impl Directory for RamDirectory {
|
||||||
fn get_file_handle(&self, path: &Path) -> Result<Box<dyn FileHandle>, OpenReadError> {
|
fn get_file_handle(&self, path: &Path) -> Result<Box<dyn FileHandle>, OpenReadError> {
|
||||||
let file_slice = self.open_read(path)?;
|
let file_slice = self.open_read(path)?;
|
||||||
Ok(Box::new(file_slice))
|
Ok(Box::new(file_slice))
|
||||||
@@ -175,8 +175,8 @@ impl Directory for RAMDirectory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
|
||||||
fail_point!("RAMDirectory::delete", |_| {
|
fail_point!("RamDirectory::delete", |_| {
|
||||||
Err(DeleteError::IOError {
|
Err(DeleteError::IoError {
|
||||||
io_error: io::Error::from(io::ErrorKind::Other),
|
io_error: io::Error::from(io::ErrorKind::Other),
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
})
|
})
|
||||||
@@ -188,7 +188,7 @@ impl Directory for RAMDirectory {
|
|||||||
Ok(self
|
Ok(self
|
||||||
.fs
|
.fs
|
||||||
.read()
|
.read()
|
||||||
.map_err(|e| OpenReadError::IOError {
|
.map_err(|e| OpenReadError::IoError {
|
||||||
io_error: io::Error::new(io::ErrorKind::Other, e.to_string()),
|
io_error: io::Error::new(io::ErrorKind::Other, e.to_string()),
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
})?
|
})?
|
||||||
@@ -212,7 +212,7 @@ impl Directory for RAMDirectory {
|
|||||||
let bytes =
|
let bytes =
|
||||||
self.open_read(path)?
|
self.open_read(path)?
|
||||||
.read_bytes()
|
.read_bytes()
|
||||||
.map_err(|io_error| OpenReadError::IOError {
|
.map_err(|io_error| OpenReadError::IoError {
|
||||||
io_error,
|
io_error,
|
||||||
filepath: path.to_path_buf(),
|
filepath: path.to_path_buf(),
|
||||||
})?;
|
})?;
|
||||||
@@ -220,7 +220,7 @@ impl Directory for RAMDirectory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn atomic_write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
|
fn atomic_write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
|
||||||
fail_point!("RAMDirectory::atomic_write", |msg| Err(io::Error::new(
|
fail_point!("RamDirectory::atomic_write", |msg| Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
msg.unwrap_or_else(|| "Undefined".to_string())
|
msg.unwrap_or_else(|| "Undefined".to_string())
|
||||||
)));
|
)));
|
||||||
@@ -241,7 +241,7 @@ impl Directory for RAMDirectory {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::RAMDirectory;
|
use super::RamDirectory;
|
||||||
use crate::Directory;
|
use crate::Directory;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -252,12 +252,12 @@ mod tests {
|
|||||||
let msg_seq: &'static [u8] = b"sequential is the way";
|
let msg_seq: &'static [u8] = b"sequential is the way";
|
||||||
let path_atomic: &'static Path = Path::new("atomic");
|
let path_atomic: &'static Path = Path::new("atomic");
|
||||||
let path_seq: &'static Path = Path::new("seq");
|
let path_seq: &'static Path = Path::new("seq");
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
assert!(directory.atomic_write(path_atomic, msg_atomic).is_ok());
|
assert!(directory.atomic_write(path_atomic, msg_atomic).is_ok());
|
||||||
let mut wrt = directory.open_write(path_seq).unwrap();
|
let mut wrt = directory.open_write(path_seq).unwrap();
|
||||||
assert!(wrt.write_all(msg_seq).is_ok());
|
assert!(wrt.write_all(msg_seq).is_ok());
|
||||||
assert!(wrt.flush().is_ok());
|
assert!(wrt.flush().is_ok());
|
||||||
let directory_copy = RAMDirectory::create();
|
let directory_copy = RamDirectory::create();
|
||||||
assert!(directory.persist(&directory_copy).is_ok());
|
assert!(directory.persist(&directory_copy).is_ok());
|
||||||
assert_eq!(directory_copy.atomic_read(path_atomic).unwrap(), msg_atomic);
|
assert_eq!(directory_copy.atomic_read(path_atomic).unwrap(), msg_atomic);
|
||||||
assert_eq!(directory_copy.atomic_read(path_seq).unwrap(), msg_seq);
|
assert_eq!(directory_copy.atomic_read(path_seq).unwrap(), msg_seq);
|
||||||
|
|||||||
@@ -65,12 +65,12 @@ mod mmap_directory_tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mod ram_directory_tests {
|
mod ram_directory_tests {
|
||||||
use crate::directory::RAMDirectory;
|
use crate::directory::RamDirectory;
|
||||||
|
|
||||||
type DirectoryImpl = RAMDirectory;
|
type DirectoryImpl = RamDirectory;
|
||||||
|
|
||||||
fn make_directory() -> DirectoryImpl {
|
fn make_directory() -> DirectoryImpl {
|
||||||
RAMDirectory::default()
|
RamDirectory::default()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -122,7 +122,7 @@ mod ram_directory_tests {
|
|||||||
#[should_panic]
|
#[should_panic]
|
||||||
fn ram_directory_panics_if_flush_forgotten() {
|
fn ram_directory_panics_if_flush_forgotten() {
|
||||||
let test_path: &'static Path = Path::new("some_path_for_test");
|
let test_path: &'static Path = Path::new("some_path_for_test");
|
||||||
let ram_directory = RAMDirectory::create();
|
let ram_directory = RamDirectory::create();
|
||||||
let mut write_file = ram_directory.open_write(test_path).unwrap();
|
let mut write_file = ram_directory.open_write(test_path).unwrap();
|
||||||
assert!(write_file.write_all(&[4]).is_ok());
|
assert!(write_file.write_all(&[4]).is_ok());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ pub enum TantivyError {
|
|||||||
LockFailure(LockError, Option<String>),
|
LockFailure(LockError, Option<String>),
|
||||||
/// IO Error.
|
/// IO Error.
|
||||||
#[error("An IO error occurred: '{0}'")]
|
#[error("An IO error occurred: '{0}'")]
|
||||||
IOError(#[from] io::Error),
|
IoError(#[from] io::Error),
|
||||||
/// Data corruption.
|
/// Data corruption.
|
||||||
#[error("Data corrupted: '{0:?}'")]
|
#[error("Data corrupted: '{0:?}'")]
|
||||||
DataCorruption(DataCorruption),
|
DataCorruption(DataCorruption),
|
||||||
@@ -136,7 +136,7 @@ impl From<schema::DocParsingError> for TantivyError {
|
|||||||
|
|
||||||
impl From<serde_json::Error> for TantivyError {
|
impl From<serde_json::Error> for TantivyError {
|
||||||
fn from(error: serde_json::Error) -> TantivyError {
|
fn from(error: serde_json::Error) -> TantivyError {
|
||||||
TantivyError::IOError(error.into())
|
TantivyError::IoError(error.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,14 +47,14 @@ pub struct DeleteBitSet {
|
|||||||
impl DeleteBitSet {
|
impl DeleteBitSet {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) fn for_test(docs: &[DocId], max_doc: u32) -> DeleteBitSet {
|
pub(crate) fn for_test(docs: &[DocId], max_doc: u32) -> DeleteBitSet {
|
||||||
use crate::directory::{Directory, RAMDirectory, TerminatingWrite};
|
use crate::directory::{Directory, RamDirectory, TerminatingWrite};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
assert!(docs.iter().all(|&doc| doc < max_doc));
|
assert!(docs.iter().all(|&doc| doc < max_doc));
|
||||||
let mut bitset = BitSet::with_max_value(max_doc);
|
let mut bitset = BitSet::with_max_value(max_doc);
|
||||||
for &doc in docs {
|
for &doc in docs {
|
||||||
bitset.insert(doc);
|
bitset.insert(doc);
|
||||||
}
|
}
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = Path::new("dummydeletebitset");
|
let path = Path::new("dummydeletebitset");
|
||||||
let mut wrt = directory.open_write(path).unwrap();
|
let mut wrt = directory.open_write(path).unwrap();
|
||||||
write_delete_bitset(&bitset, max_doc, &mut wrt).unwrap();
|
write_delete_bitset(&bitset, max_doc, &mut wrt).unwrap();
|
||||||
|
|||||||
@@ -201,7 +201,7 @@ mod tests {
|
|||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::common::CompositeFile;
|
use crate::common::CompositeFile;
|
||||||
use crate::directory::{Directory, RAMDirectory, WritePtr};
|
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||||
use crate::fastfield::FastFieldReader;
|
use crate::fastfield::FastFieldReader;
|
||||||
use crate::merge_policy::NoMergePolicy;
|
use crate::merge_policy::NoMergePolicy;
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
@@ -242,7 +242,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_intfastfield_small() -> crate::Result<()> {
|
fn test_intfastfield_small() -> crate::Result<()> {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||||
@@ -269,7 +269,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_intfastfield_large() -> crate::Result<()> {
|
fn test_intfastfield_large() -> crate::Result<()> {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||||
let mut serializer = FastFieldSerializer::from_write(write)?;
|
let mut serializer = FastFieldSerializer::from_write(write)?;
|
||||||
@@ -308,7 +308,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_intfastfield_null_amplitude() -> crate::Result<()> {
|
fn test_intfastfield_null_amplitude() -> crate::Result<()> {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
|
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
@@ -338,7 +338,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_intfastfield_large_numbers() -> crate::Result<()> {
|
fn test_intfastfield_large_numbers() -> crate::Result<()> {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
|
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
@@ -374,7 +374,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_signed_intfastfield() -> crate::Result<()> {
|
fn test_signed_intfastfield() -> crate::Result<()> {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
|
|
||||||
let i64_field = schema_builder.add_i64_field("field", FAST);
|
let i64_field = schema_builder.add_i64_field("field", FAST);
|
||||||
@@ -417,7 +417,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_signed_intfastfield_default_val() -> crate::Result<()> {
|
fn test_signed_intfastfield_default_val() -> crate::Result<()> {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
let mut schema_builder = Schema::builder();
|
let mut schema_builder = Schema::builder();
|
||||||
let i64_field = schema_builder.add_i64_field("field", FAST);
|
let i64_field = schema_builder.add_i64_field("field", FAST);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
@@ -456,7 +456,7 @@ mod tests {
|
|||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let permutation = generate_permutation();
|
let permutation = generate_permutation();
|
||||||
let n = permutation.len();
|
let n = permutation.len();
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
let write: WritePtr = directory.open_write(Path::new("test"))?;
|
||||||
let mut serializer = FastFieldSerializer::from_write(write)?;
|
let mut serializer = FastFieldSerializer::from_write(write)?;
|
||||||
@@ -576,7 +576,7 @@ mod bench {
|
|||||||
use super::tests::{generate_permutation, SCHEMA};
|
use super::tests::{generate_permutation, SCHEMA};
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::common::CompositeFile;
|
use crate::common::CompositeFile;
|
||||||
use crate::directory::{Directory, RAMDirectory, WritePtr};
|
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||||
use crate::fastfield::FastFieldReader;
|
use crate::fastfield::FastFieldReader;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@@ -612,7 +612,7 @@ mod bench {
|
|||||||
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
|
fn bench_intfastfield_linear_fflookup(b: &mut Bencher) {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let permutation = generate_permutation();
|
let permutation = generate_permutation();
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||||
@@ -646,7 +646,7 @@ mod bench {
|
|||||||
fn bench_intfastfield_fflookup(b: &mut Bencher) {
|
fn bench_intfastfield_fflookup(b: &mut Bencher) {
|
||||||
let path = Path::new("test");
|
let path = Path::new("test");
|
||||||
let permutation = generate_permutation();
|
let permutation = generate_permutation();
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
let write: WritePtr = directory.open_write(Path::new("test")).unwrap();
|
||||||
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
let mut serializer = FastFieldSerializer::from_write(write).unwrap();
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use crate::common::compute_num_bits;
|
|||||||
use crate::common::BinarySerializable;
|
use crate::common::BinarySerializable;
|
||||||
use crate::common::CompositeFile;
|
use crate::common::CompositeFile;
|
||||||
use crate::directory::FileSlice;
|
use crate::directory::FileSlice;
|
||||||
use crate::directory::{Directory, RAMDirectory, WritePtr};
|
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||||
use crate::fastfield::{FastFieldSerializer, FastFieldsWriter};
|
use crate::fastfield::{FastFieldSerializer, FastFieldsWriter};
|
||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
use crate::schema::FAST;
|
use crate::schema::FAST;
|
||||||
@@ -118,18 +118,18 @@ impl<Item: FastValue> From<Vec<Item>> for FastFieldReader<Item> {
|
|||||||
let field = schema_builder.add_u64_field("field", FAST);
|
let field = schema_builder.add_u64_field("field", FAST);
|
||||||
let schema = schema_builder.build();
|
let schema = schema_builder.build();
|
||||||
let path = Path::new("__dummy__");
|
let path = Path::new("__dummy__");
|
||||||
let directory: RAMDirectory = RAMDirectory::create();
|
let directory: RamDirectory = RamDirectory::create();
|
||||||
{
|
{
|
||||||
let write: WritePtr = directory
|
let write: WritePtr = directory
|
||||||
.open_write(path)
|
.open_write(path)
|
||||||
.expect("With a RAMDirectory, this should never fail.");
|
.expect("With a RamDirectory, this should never fail.");
|
||||||
let mut serializer = FastFieldSerializer::from_write(write)
|
let mut serializer = FastFieldSerializer::from_write(write)
|
||||||
.expect("With a RAMDirectory, this should never fail.");
|
.expect("With a RamDirectory, this should never fail.");
|
||||||
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
let mut fast_field_writers = FastFieldsWriter::from_schema(&schema);
|
||||||
{
|
{
|
||||||
let fast_field_writer = fast_field_writers
|
let fast_field_writer = fast_field_writers
|
||||||
.get_field_writer(field)
|
.get_field_writer(field)
|
||||||
.expect("With a RAMDirectory, this should never fail.");
|
.expect("With a RamDirectory, this should never fail.");
|
||||||
for val in vals {
|
for val in vals {
|
||||||
fast_field_writer.add_val(val.to_u64());
|
fast_field_writer.add_val(val.to_u64());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
//! precompute computationally expensive functions of the fieldnorm
|
//! precompute computationally expensive functions of the fieldnorm
|
||||||
//! in a very short array.
|
//! in a very short array.
|
||||||
//!
|
//!
|
||||||
//! This trick is used by the BM25 similarity.
|
//! This trick is used by the Bm25 similarity.
|
||||||
mod code;
|
mod code;
|
||||||
mod reader;
|
mod reader;
|
||||||
mod serializer;
|
mod serializer;
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ pub(crate) fn advance_deletes(
|
|||||||
if num_deleted_docs > num_deleted_docs_before {
|
if num_deleted_docs > num_deleted_docs_before {
|
||||||
// There are new deletes. We need to write a new delete file.
|
// There are new deletes. We need to write a new delete file.
|
||||||
segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
|
segment = segment.with_delete_meta(num_deleted_docs as u32, target_opstamp);
|
||||||
let mut delete_file = segment.open_write(SegmentComponent::DELETE)?;
|
let mut delete_file = segment.open_write(SegmentComponent::Delete)?;
|
||||||
write_delete_bitset(&delete_bitset, max_doc, &mut delete_file)?;
|
write_delete_bitset(&delete_bitset, max_doc, &mut delete_file)?;
|
||||||
delete_file.terminate()?;
|
delete_file.terminate()?;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -687,7 +687,7 @@ impl SerializableSegment for IndexMerger {
|
|||||||
}
|
}
|
||||||
let fieldnorm_data = serializer
|
let fieldnorm_data = serializer
|
||||||
.segment()
|
.segment()
|
||||||
.open_read(SegmentComponent::FIELDNORMS)?;
|
.open_read(SegmentComponent::FieldNorms)?;
|
||||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||||
let term_ord_mappings =
|
let term_ord_mappings =
|
||||||
self.write_postings(serializer.get_postings_serializer(), fieldnorm_readers)?;
|
self.write_postings(serializer.get_postings_serializer(), fieldnorm_readers)?;
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ pub struct SegmentSerializer {
|
|||||||
impl SegmentSerializer {
|
impl SegmentSerializer {
|
||||||
/// Creates a new `SegmentSerializer`.
|
/// Creates a new `SegmentSerializer`.
|
||||||
pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
|
pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
|
||||||
let store_write = segment.open_write(SegmentComponent::STORE)?;
|
let store_write = segment.open_write(SegmentComponent::Store)?;
|
||||||
|
|
||||||
let fast_field_write = segment.open_write(SegmentComponent::FASTFIELDS)?;
|
let fast_field_write = segment.open_write(SegmentComponent::FastFields)?;
|
||||||
let fast_field_serializer = FastFieldSerializer::from_write(fast_field_write)?;
|
let fast_field_serializer = FastFieldSerializer::from_write(fast_field_write)?;
|
||||||
|
|
||||||
let fieldnorms_write = segment.open_write(SegmentComponent::FIELDNORMS)?;
|
let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?;
|
||||||
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
let fieldnorms_serializer = FieldNormsSerializer::from_write(fieldnorms_write)?;
|
||||||
|
|
||||||
let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
|
let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
|
||||||
|
|||||||
@@ -616,7 +616,7 @@ impl SegmentUpdater {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::merge_segments;
|
use super::merge_segments;
|
||||||
use crate::directory::RAMDirectory;
|
use crate::directory::RamDirectory;
|
||||||
use crate::indexer::merge_policy::tests::MergeWheneverPossible;
|
use crate::indexer::merge_policy::tests::MergeWheneverPossible;
|
||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
@@ -765,7 +765,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert_eq!(indices.len(), 3);
|
assert_eq!(indices.len(), 3);
|
||||||
let output_directory = RAMDirectory::default();
|
let output_directory = RamDirectory::default();
|
||||||
let index = merge_segments(&indices, output_directory)?;
|
let index = merge_segments(&indices, output_directory)?;
|
||||||
assert_eq!(index.schema(), schema);
|
assert_eq!(index.schema(), schema);
|
||||||
|
|
||||||
@@ -780,7 +780,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_merge_empty_indices_array() {
|
fn test_merge_empty_indices_array() {
|
||||||
let merge_result = merge_segments(&[], RAMDirectory::default());
|
let merge_result = merge_segments(&[], RamDirectory::default());
|
||||||
assert!(merge_result.is_err());
|
assert!(merge_result.is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -807,7 +807,7 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// mismatched schema index list
|
// mismatched schema index list
|
||||||
let result = merge_segments(&[first_index, second_index], RAMDirectory::default());
|
let result = merge_segments(&[first_index, second_index], RamDirectory::default());
|
||||||
assert!(result.is_err());
|
assert!(result.is_err());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -311,7 +311,7 @@ fn write(
|
|||||||
}
|
}
|
||||||
let fieldnorm_data = serializer
|
let fieldnorm_data = serializer
|
||||||
.segment()
|
.segment()
|
||||||
.open_read(SegmentComponent::FIELDNORMS)?;
|
.open_read(SegmentComponent::FieldNorms)?;
|
||||||
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
let fieldnorm_readers = FieldNormReaders::open(fieldnorm_data)?;
|
||||||
let term_ord_map =
|
let term_ord_map =
|
||||||
multifield_postings.serialize(serializer.get_postings_serializer(), fieldnorm_readers)?;
|
multifield_postings.serialize(serializer.get_postings_serializer(), fieldnorm_readers)?;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/// Positions are stored in three parts and over two files.
|
/// Positions are stored in three parts and over two files.
|
||||||
//
|
//
|
||||||
/// The `SegmentComponent::POSITIONS` file contains all of the bitpacked positions delta,
|
/// The `SegmentComponent::Positions` file contains all of the bitpacked positions delta,
|
||||||
/// for all terms of a given field, one term after the other.
|
/// for all terms of a given field, one term after the other.
|
||||||
///
|
///
|
||||||
/// If the last block is incomplete, it is simply padded with zeros.
|
/// If the last block is incomplete, it is simply padded with zeros.
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
/// If the last block is incomplete, it is simply padded with zeros.
|
/// If the last block is incomplete, it is simply padded with zeros.
|
||||||
///
|
///
|
||||||
///
|
///
|
||||||
/// The `SegmentComponent::POSITIONSSKIP` file contains the number of bits used in each block in `u8`
|
/// The `SegmentComponent::PositionsSKIP` file contains the number of bits used in each block in `u8`
|
||||||
/// stream.
|
/// stream.
|
||||||
///
|
///
|
||||||
/// This makes it possible to rapidly skip over `n positions`.
|
/// This makes it possible to rapidly skip over `n positions`.
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ fn galloping(block_docs: &[u32], target: u32) -> usize {
|
|||||||
#[derive(Clone, Copy, PartialEq)]
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
pub enum BlockSearcher {
|
pub enum BlockSearcher {
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
SSE2,
|
Sse2,
|
||||||
Scalar,
|
Scalar,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,7 +139,7 @@ impl BlockSearcher {
|
|||||||
pub(crate) fn search_in_block(self, block_docs: &AlignedBuffer, target: u32) -> usize {
|
pub(crate) fn search_in_block(self, block_docs: &AlignedBuffer, target: u32) -> usize {
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
{
|
{
|
||||||
if self == BlockSearcher::SSE2 {
|
if self == BlockSearcher::Sse2 {
|
||||||
return sse2::linear_search_sse2_128(block_docs, target);
|
return sse2::linear_search_sse2_128(block_docs, target);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -152,7 +152,7 @@ impl Default for BlockSearcher {
|
|||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
{
|
{
|
||||||
if is_x86_feature_detected!("sse2") {
|
if is_x86_feature_detected!("sse2") {
|
||||||
return BlockSearcher::SSE2;
|
return BlockSearcher::Sse2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BlockSearcher::Scalar
|
BlockSearcher::Scalar
|
||||||
@@ -236,6 +236,6 @@ mod tests {
|
|||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(target_arch = "x86_64")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_search_in_block_sse2() {
|
fn test_search_in_block_sse2() {
|
||||||
test_search_in_block_util(BlockSearcher::SSE2);
|
test_search_in_block_util(BlockSearcher::Sse2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use crate::postings::compression::{
|
|||||||
AlignedBuffer, BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE,
|
AlignedBuffer, BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE,
|
||||||
};
|
};
|
||||||
use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
|
use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
|
||||||
use crate::query::BM25Weight;
|
use crate::query::Bm25Weight;
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
use crate::{DocId, Score, TERMINATED};
|
use crate::{DocId, Score, TERMINATED};
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ impl BlockSegmentPostings {
|
|||||||
pub fn block_max_score(
|
pub fn block_max_score(
|
||||||
&mut self,
|
&mut self,
|
||||||
fieldnorm_reader: &FieldNormReader,
|
fieldnorm_reader: &FieldNormReader,
|
||||||
bm25_weight: &BM25Weight,
|
bm25_weight: &Bm25Weight,
|
||||||
) -> Score {
|
) -> Score {
|
||||||
if let Some(score) = self.block_max_score_cache {
|
if let Some(score) = self.block_max_score_cache {
|
||||||
return score;
|
return score;
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ pub mod tests {
|
|||||||
field_serializer.close_term()?;
|
field_serializer.close_term()?;
|
||||||
mem::drop(field_serializer);
|
mem::drop(field_serializer);
|
||||||
posting_serializer.close()?;
|
posting_serializer.close()?;
|
||||||
let read = segment.open_read(SegmentComponent::POSITIONS)?;
|
let read = segment.open_read(SegmentComponent::Positions)?;
|
||||||
assert!(read.len() <= 140);
|
assert!(read.len() <= 140);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use super::stacker::{Addr, MemoryArena, TermHashMap};
|
|||||||
|
|
||||||
use crate::fieldnorm::FieldNormReaders;
|
use crate::fieldnorm::FieldNormReaders;
|
||||||
use crate::postings::recorder::{
|
use crate::postings::recorder::{
|
||||||
BufferLender, NothingRecorder, Recorder, TFAndPositionRecorder, TermFrequencyRecorder,
|
BufferLender, NothingRecorder, Recorder, TfAndPositionRecorder, TermFrequencyRecorder,
|
||||||
};
|
};
|
||||||
use crate::postings::UnorderedTermId;
|
use crate::postings::UnorderedTermId;
|
||||||
use crate::postings::{FieldSerializer, InvertedIndexSerializer};
|
use crate::postings::{FieldSerializer, InvertedIndexSerializer};
|
||||||
@@ -30,7 +30,7 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter>
|
|||||||
SpecializedPostingsWriter::<TermFrequencyRecorder>::new_boxed()
|
SpecializedPostingsWriter::<TermFrequencyRecorder>::new_boxed()
|
||||||
}
|
}
|
||||||
IndexRecordOption::WithFreqsAndPositions => {
|
IndexRecordOption::WithFreqsAndPositions => {
|
||||||
SpecializedPostingsWriter::<TFAndPositionRecorder>::new_boxed()
|
SpecializedPostingsWriter::<TfAndPositionRecorder>::new_boxed()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()),
|
.unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()),
|
||||||
|
|||||||
@@ -192,14 +192,14 @@ impl Recorder for TermFrequencyRecorder {
|
|||||||
|
|
||||||
/// Recorder encoding term frequencies as well as positions.
|
/// Recorder encoding term frequencies as well as positions.
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct TFAndPositionRecorder {
|
pub struct TfAndPositionRecorder {
|
||||||
stack: ExpUnrolledLinkedList,
|
stack: ExpUnrolledLinkedList,
|
||||||
current_doc: DocId,
|
current_doc: DocId,
|
||||||
term_doc_freq: u32,
|
term_doc_freq: u32,
|
||||||
}
|
}
|
||||||
impl Recorder for TFAndPositionRecorder {
|
impl Recorder for TfAndPositionRecorder {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
TFAndPositionRecorder {
|
TfAndPositionRecorder {
|
||||||
stack: ExpUnrolledLinkedList::new(),
|
stack: ExpUnrolledLinkedList::new(),
|
||||||
current_doc: u32::max_value(),
|
current_doc: u32::max_value(),
|
||||||
term_doc_freq: 0u32,
|
term_doc_freq: 0u32,
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use crate::fieldnorm::FieldNormReader;
|
|||||||
use crate::positions::PositionSerializer;
|
use crate::positions::PositionSerializer;
|
||||||
use crate::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE};
|
use crate::postings::compression::{BlockEncoder, VIntEncoder, COMPRESSION_BLOCK_SIZE};
|
||||||
use crate::postings::skip::SkipSerializer;
|
use crate::postings::skip::SkipSerializer;
|
||||||
use crate::query::BM25Weight;
|
use crate::query::Bm25Weight;
|
||||||
use crate::schema::{Field, FieldEntry, FieldType};
|
use crate::schema::{Field, FieldEntry, FieldType};
|
||||||
use crate::schema::{IndexRecordOption, Schema};
|
use crate::schema::{IndexRecordOption, Schema};
|
||||||
use crate::termdict::{TermDictionaryBuilder, TermOrdinal};
|
use crate::termdict::{TermDictionaryBuilder, TermOrdinal};
|
||||||
@@ -57,12 +57,12 @@ pub struct InvertedIndexSerializer {
|
|||||||
impl InvertedIndexSerializer {
|
impl InvertedIndexSerializer {
|
||||||
/// Open a new `PostingsSerializer` for the given segment
|
/// Open a new `PostingsSerializer` for the given segment
|
||||||
pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
|
pub fn open(segment: &mut Segment) -> crate::Result<InvertedIndexSerializer> {
|
||||||
use crate::SegmentComponent::{POSITIONS, POSITIONSSKIP, POSTINGS, TERMS};
|
use crate::SegmentComponent::{Positions, PositionsSkip, Postings, Terms};
|
||||||
let inv_index_serializer = InvertedIndexSerializer {
|
let inv_index_serializer = InvertedIndexSerializer {
|
||||||
terms_write: CompositeWrite::wrap(segment.open_write(TERMS)?),
|
terms_write: CompositeWrite::wrap(segment.open_write(Terms)?),
|
||||||
postings_write: CompositeWrite::wrap(segment.open_write(POSTINGS)?),
|
postings_write: CompositeWrite::wrap(segment.open_write(Postings)?),
|
||||||
positions_write: CompositeWrite::wrap(segment.open_write(POSITIONS)?),
|
positions_write: CompositeWrite::wrap(segment.open_write(Positions)?),
|
||||||
positionsidx_write: CompositeWrite::wrap(segment.open_write(POSITIONSSKIP)?),
|
positionsidx_write: CompositeWrite::wrap(segment.open_write(PositionsSkip)?),
|
||||||
schema: segment.schema(),
|
schema: segment.schema(),
|
||||||
};
|
};
|
||||||
Ok(inv_index_serializer)
|
Ok(inv_index_serializer)
|
||||||
@@ -307,7 +307,7 @@ pub struct PostingsSerializer<W: Write> {
|
|||||||
mode: IndexRecordOption,
|
mode: IndexRecordOption,
|
||||||
fieldnorm_reader: Option<FieldNormReader>,
|
fieldnorm_reader: Option<FieldNormReader>,
|
||||||
|
|
||||||
bm25_weight: Option<BM25Weight>,
|
bm25_weight: Option<Bm25Weight>,
|
||||||
|
|
||||||
num_docs: u32, // Number of docs in the segment
|
num_docs: u32, // Number of docs in the segment
|
||||||
avg_fieldnorm: Score, // Average number of term in the field for that segment.
|
avg_fieldnorm: Score, // Average number of term in the field for that segment.
|
||||||
@@ -347,7 +347,7 @@ impl<W: Write> PostingsSerializer<W> {
|
|||||||
|
|
||||||
pub fn new_term(&mut self, term_doc_freq: u32) {
|
pub fn new_term(&mut self, term_doc_freq: u32) {
|
||||||
if self.mode.has_freq() && self.num_docs > 0 {
|
if self.mode.has_freq() && self.num_docs > 0 {
|
||||||
let bm25_weight = BM25Weight::for_one_term(
|
let bm25_weight = Bm25Weight::for_one_term(
|
||||||
term_doc_freq as u64,
|
term_doc_freq as u64,
|
||||||
self.num_docs as u64,
|
self.num_docs as u64,
|
||||||
self.avg_fieldnorm,
|
self.avg_fieldnorm,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::convert::TryInto;
|
|||||||
|
|
||||||
use crate::directory::OwnedBytes;
|
use crate::directory::OwnedBytes;
|
||||||
use crate::postings::compression::{compressed_block_size, COMPRESSION_BLOCK_SIZE};
|
use crate::postings::compression::{compressed_block_size, COMPRESSION_BLOCK_SIZE};
|
||||||
use crate::query::BM25Weight;
|
use crate::query::Bm25Weight;
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
use crate::{DocId, Score, TERMINATED};
|
use crate::{DocId, Score, TERMINATED};
|
||||||
|
|
||||||
@@ -144,7 +144,7 @@ impl SkipReader {
|
|||||||
//
|
//
|
||||||
// The block max score is available for all full bitpacked block,
|
// The block max score is available for all full bitpacked block,
|
||||||
// but no available for the last VInt encoded incomplete block.
|
// but no available for the last VInt encoded incomplete block.
|
||||||
pub fn block_max_score(&self, bm25_weight: &BM25Weight) -> Option<Score> {
|
pub fn block_max_score(&self, bm25_weight: &Bm25Weight) -> Option<Score> {
|
||||||
match self.block_info {
|
match self.block_info {
|
||||||
BlockInfo::BitPacked {
|
BlockInfo::BitPacked {
|
||||||
block_wand_fieldnorm_id,
|
block_wand_fieldnorm_id,
|
||||||
|
|||||||
@@ -29,22 +29,22 @@ fn compute_tf_cache(average_fieldnorm: Score) -> [Score; 256] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
|
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
|
||||||
pub struct BM25Params {
|
pub struct Bm25Params {
|
||||||
pub idf: Score,
|
pub idf: Score,
|
||||||
pub avg_fieldnorm: Score,
|
pub avg_fieldnorm: Score,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct BM25Weight {
|
pub struct Bm25Weight {
|
||||||
idf_explain: Explanation,
|
idf_explain: Explanation,
|
||||||
weight: Score,
|
weight: Score,
|
||||||
cache: [Score; 256],
|
cache: [Score; 256],
|
||||||
average_fieldnorm: Score,
|
average_fieldnorm: Score,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BM25Weight {
|
impl Bm25Weight {
|
||||||
pub fn boost_by(&self, boost: Score) -> BM25Weight {
|
pub fn boost_by(&self, boost: Score) -> Bm25Weight {
|
||||||
BM25Weight {
|
Bm25Weight {
|
||||||
idf_explain: self.idf_explain.clone(),
|
idf_explain: self.idf_explain.clone(),
|
||||||
weight: self.weight * boost,
|
weight: self.weight * boost,
|
||||||
cache: self.cache,
|
cache: self.cache,
|
||||||
@@ -52,8 +52,8 @@ impl BM25Weight {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn for_terms(searcher: &Searcher, terms: &[Term]) -> crate::Result<BM25Weight> {
|
pub fn for_terms(searcher: &Searcher, terms: &[Term]) -> crate::Result<Bm25Weight> {
|
||||||
assert!(!terms.is_empty(), "BM25 requires at least one term");
|
assert!(!terms.is_empty(), "Bm25 requires at least one term");
|
||||||
let field = terms[0].field();
|
let field = terms[0].field();
|
||||||
for term in &terms[1..] {
|
for term in &terms[1..] {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -74,7 +74,7 @@ impl BM25Weight {
|
|||||||
|
|
||||||
if terms.len() == 1 {
|
if terms.len() == 1 {
|
||||||
let term_doc_freq = searcher.doc_freq(&terms[0])?;
|
let term_doc_freq = searcher.doc_freq(&terms[0])?;
|
||||||
Ok(BM25Weight::for_one_term(
|
Ok(Bm25Weight::for_one_term(
|
||||||
term_doc_freq,
|
term_doc_freq,
|
||||||
total_num_docs,
|
total_num_docs,
|
||||||
average_fieldnorm,
|
average_fieldnorm,
|
||||||
@@ -86,7 +86,7 @@ impl BM25Weight {
|
|||||||
idf_sum += idf(term_doc_freq, total_num_docs);
|
idf_sum += idf(term_doc_freq, total_num_docs);
|
||||||
}
|
}
|
||||||
let idf_explain = Explanation::new("idf", idf_sum);
|
let idf_explain = Explanation::new("idf", idf_sum);
|
||||||
Ok(BM25Weight::new(idf_explain, average_fieldnorm))
|
Ok(Bm25Weight::new(idf_explain, average_fieldnorm))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,7 +94,7 @@ impl BM25Weight {
|
|||||||
term_doc_freq: u64,
|
term_doc_freq: u64,
|
||||||
total_num_docs: u64,
|
total_num_docs: u64,
|
||||||
avg_fieldnorm: Score,
|
avg_fieldnorm: Score,
|
||||||
) -> BM25Weight {
|
) -> Bm25Weight {
|
||||||
let idf = idf(term_doc_freq, total_num_docs);
|
let idf = idf(term_doc_freq, total_num_docs);
|
||||||
let mut idf_explain =
|
let mut idf_explain =
|
||||||
Explanation::new("idf, computed as log(1 + (N - n + 0.5) / (n + 0.5))", idf);
|
Explanation::new("idf, computed as log(1 + (N - n + 0.5) / (n + 0.5))", idf);
|
||||||
@@ -103,12 +103,12 @@ impl BM25Weight {
|
|||||||
term_doc_freq as Score,
|
term_doc_freq as Score,
|
||||||
);
|
);
|
||||||
idf_explain.add_const("N, total number of docs", total_num_docs as Score);
|
idf_explain.add_const("N, total number of docs", total_num_docs as Score);
|
||||||
BM25Weight::new(idf_explain, avg_fieldnorm)
|
Bm25Weight::new(idf_explain, avg_fieldnorm)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> BM25Weight {
|
pub(crate) fn new(idf_explain: Explanation, average_fieldnorm: Score) -> Bm25Weight {
|
||||||
let weight = idf_explain.value() * (1.0 + K1);
|
let weight = idf_explain.value() * (1.0 + K1);
|
||||||
BM25Weight {
|
Bm25Weight {
|
||||||
idf_explain,
|
idf_explain,
|
||||||
weight,
|
weight,
|
||||||
cache: compute_tf_cache(average_fieldnorm),
|
cache: compute_tf_cache(average_fieldnorm),
|
||||||
|
|||||||
@@ -238,7 +238,7 @@ mod tests {
|
|||||||
use crate::query::score_combiner::SumCombiner;
|
use crate::query::score_combiner::SumCombiner;
|
||||||
use crate::query::term_query::TermScorer;
|
use crate::query::term_query::TermScorer;
|
||||||
use crate::query::Union;
|
use crate::query::Union;
|
||||||
use crate::query::{BM25Weight, Scorer};
|
use crate::query::{Bm25Weight, Scorer};
|
||||||
use crate::{DocId, DocSet, Score, TERMINATED};
|
use crate::{DocId, DocSet, Score, TERMINATED};
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
@@ -393,7 +393,7 @@ mod tests {
|
|||||||
let term_scorers: Vec<TermScorer> = postings_lists_expanded
|
let term_scorers: Vec<TermScorer> = postings_lists_expanded
|
||||||
.iter()
|
.iter()
|
||||||
.map(|postings| {
|
.map(|postings| {
|
||||||
let bm25_weight = BM25Weight::for_one_term(
|
let bm25_weight = Bm25Weight::for_one_term(
|
||||||
postings.len() as u64,
|
postings.len() as u64,
|
||||||
max_doc as u64,
|
max_doc as u64,
|
||||||
average_fieldnorm,
|
average_fieldnorm,
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ use std::collections::HashMap;
|
|||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use tantivy_fst::Automaton;
|
use tantivy_fst::Automaton;
|
||||||
|
|
||||||
pub(crate) struct DFAWrapper(pub DFA);
|
pub(crate) struct DfaWrapper(pub DFA);
|
||||||
|
|
||||||
impl Automaton for DFAWrapper {
|
impl Automaton for DfaWrapper {
|
||||||
type State = u32;
|
type State = u32;
|
||||||
|
|
||||||
fn start(&self) -> Self::State {
|
fn start(&self) -> Self::State {
|
||||||
@@ -127,7 +127,7 @@ impl FuzzyTermQuery {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn specialized_weight(&self) -> crate::Result<AutomatonWeight<DFAWrapper>> {
|
fn specialized_weight(&self) -> crate::Result<AutomatonWeight<DfaWrapper>> {
|
||||||
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
||||||
match LEV_BUILDER.get(&(self.distance, false)) {
|
match LEV_BUILDER.get(&(self.distance, false)) {
|
||||||
// Unwrap the option and build the Ok(AutomatonWeight)
|
// Unwrap the option and build the Ok(AutomatonWeight)
|
||||||
@@ -139,7 +139,7 @@ impl FuzzyTermQuery {
|
|||||||
};
|
};
|
||||||
Ok(AutomatonWeight::new(
|
Ok(AutomatonWeight::new(
|
||||||
self.term.field(),
|
self.term.field(),
|
||||||
DFAWrapper(automaton),
|
DfaWrapper(automaton),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
None => Err(InvalidArgument(format!(
|
None => Err(InvalidArgument(format!(
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ mod weight;
|
|||||||
mod vec_docset;
|
mod vec_docset;
|
||||||
|
|
||||||
pub(crate) mod score_combiner;
|
pub(crate) mod score_combiner;
|
||||||
pub(crate) use self::bm25::BM25Weight;
|
pub(crate) use self::bm25::Bm25Weight;
|
||||||
pub use self::intersection::Intersection;
|
pub use self::intersection::Intersection;
|
||||||
pub use self::union::Union;
|
pub use self::union::Union;
|
||||||
|
|
||||||
@@ -42,7 +42,7 @@ pub use self::empty_query::{EmptyQuery, EmptyScorer, EmptyWeight};
|
|||||||
pub use self::exclude::Exclude;
|
pub use self::exclude::Exclude;
|
||||||
pub use self::explanation::Explanation;
|
pub use self::explanation::Explanation;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) use self::fuzzy_query::DFAWrapper;
|
pub(crate) use self::fuzzy_query::DfaWrapper;
|
||||||
pub use self::fuzzy_query::FuzzyTermQuery;
|
pub use self::fuzzy_query::FuzzyTermQuery;
|
||||||
pub use self::intersection::intersect_scorers;
|
pub use self::intersection::intersect_scorers;
|
||||||
pub use self::phrase_query::PhraseQuery;
|
pub use self::phrase_query::PhraseQuery;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use super::PhraseWeight;
|
use super::PhraseWeight;
|
||||||
use crate::core::searcher::Searcher;
|
use crate::core::searcher::Searcher;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::Bm25Weight;
|
||||||
use crate::query::Query;
|
use crate::query::Query;
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
@@ -95,7 +95,7 @@ impl PhraseQuery {
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
let terms = self.phrase_terms();
|
let terms = self.phrase_terms();
|
||||||
let bm25_weight = BM25Weight::for_terms(searcher, &terms)?;
|
let bm25_weight = Bm25Weight::for_terms(searcher, &terms)?;
|
||||||
Ok(PhraseWeight::new(
|
Ok(PhraseWeight::new(
|
||||||
self.phrase_terms.clone(),
|
self.phrase_terms.clone(),
|
||||||
bm25_weight,
|
bm25_weight,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use crate::docset::{DocSet, TERMINATED};
|
use crate::docset::{DocSet, TERMINATED};
|
||||||
use crate::fieldnorm::FieldNormReader;
|
use crate::fieldnorm::FieldNormReader;
|
||||||
use crate::postings::Postings;
|
use crate::postings::Postings;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::Bm25Weight;
|
||||||
use crate::query::{Intersection, Scorer};
|
use crate::query::{Intersection, Scorer};
|
||||||
use crate::{DocId, Score};
|
use crate::{DocId, Score};
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
@@ -49,7 +49,7 @@ pub struct PhraseScorer<TPostings: Postings> {
|
|||||||
right: Vec<u32>,
|
right: Vec<u32>,
|
||||||
phrase_count: u32,
|
phrase_count: u32,
|
||||||
fieldnorm_reader: FieldNormReader,
|
fieldnorm_reader: FieldNormReader,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
score_needed: bool,
|
score_needed: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ fn intersection(left: &mut [u32], right: &[u32]) -> usize {
|
|||||||
impl<TPostings: Postings> PhraseScorer<TPostings> {
|
impl<TPostings: Postings> PhraseScorer<TPostings> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
term_postings: Vec<(usize, TPostings)>,
|
term_postings: Vec<(usize, TPostings)>,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
fieldnorm_reader: FieldNormReader,
|
fieldnorm_reader: FieldNormReader,
|
||||||
score_needed: bool,
|
score_needed: bool,
|
||||||
) -> PhraseScorer<TPostings> {
|
) -> PhraseScorer<TPostings> {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use super::PhraseScorer;
|
|||||||
use crate::core::SegmentReader;
|
use crate::core::SegmentReader;
|
||||||
use crate::fieldnorm::FieldNormReader;
|
use crate::fieldnorm::FieldNormReader;
|
||||||
use crate::postings::SegmentPostings;
|
use crate::postings::SegmentPostings;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::Bm25Weight;
|
||||||
use crate::query::explanation::does_not_match;
|
use crate::query::explanation::does_not_match;
|
||||||
use crate::query::Scorer;
|
use crate::query::Scorer;
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
@@ -14,7 +14,7 @@ use crate::{DocId, DocSet};
|
|||||||
|
|
||||||
pub struct PhraseWeight {
|
pub struct PhraseWeight {
|
||||||
phrase_terms: Vec<(usize, Term)>,
|
phrase_terms: Vec<(usize, Term)>,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
score_needed: bool,
|
score_needed: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@ impl PhraseWeight {
|
|||||||
/// Creates a new phrase weight.
|
/// Creates a new phrase weight.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
phrase_terms: Vec<(usize, Term)>,
|
phrase_terms: Vec<(usize, Term)>,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
score_needed: bool,
|
score_needed: bool,
|
||||||
) -> PhraseWeight {
|
) -> PhraseWeight {
|
||||||
PhraseWeight {
|
PhraseWeight {
|
||||||
|
|||||||
@@ -19,18 +19,18 @@ pub enum LogicalLiteral {
|
|||||||
All,
|
All,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum LogicalAST {
|
pub enum LogicalAst {
|
||||||
Clause(Vec<(Occur, LogicalAST)>),
|
Clause(Vec<(Occur, LogicalAst)>),
|
||||||
Leaf(Box<LogicalLiteral>),
|
Leaf(Box<LogicalLiteral>),
|
||||||
Boost(Box<LogicalAST>, Score),
|
Boost(Box<LogicalAst>, Score),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LogicalAST {
|
impl LogicalAst {
|
||||||
pub fn boost(self, boost: Score) -> LogicalAST {
|
pub fn boost(self, boost: Score) -> LogicalAst {
|
||||||
if (boost - 1.0).abs() < Score::EPSILON {
|
if (boost - 1.0).abs() < Score::EPSILON {
|
||||||
self
|
self
|
||||||
} else {
|
} else {
|
||||||
LogicalAST::Boost(Box::new(self), boost)
|
LogicalAst::Boost(Box::new(self), boost)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -43,10 +43,10 @@ fn occur_letter(occur: Occur) -> &'static str {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for LogicalAST {
|
impl fmt::Debug for LogicalAst {
|
||||||
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
|
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
|
||||||
match *self {
|
match *self {
|
||||||
LogicalAST::Clause(ref clause) => {
|
LogicalAst::Clause(ref clause) => {
|
||||||
if clause.is_empty() {
|
if clause.is_empty() {
|
||||||
write!(formatter, "<emptyclause>")?;
|
write!(formatter, "<emptyclause>")?;
|
||||||
} else {
|
} else {
|
||||||
@@ -59,15 +59,15 @@ impl fmt::Debug for LogicalAST {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
LogicalAST::Boost(ref ast, boost) => write!(formatter, "{:?}^{}", ast, boost),
|
LogicalAst::Boost(ref ast, boost) => write!(formatter, "{:?}^{}", ast, boost),
|
||||||
LogicalAST::Leaf(ref literal) => write!(formatter, "{:?}", literal),
|
LogicalAst::Leaf(ref literal) => write!(formatter, "{:?}", literal),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<LogicalLiteral> for LogicalAST {
|
impl From<LogicalLiteral> for LogicalAst {
|
||||||
fn from(literal: LogicalLiteral) -> LogicalAST {
|
fn from(literal: LogicalLiteral) -> LogicalAst {
|
||||||
LogicalAST::Leaf(Box::new(literal))
|
LogicalAst::Leaf(Box::new(literal))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ use std::collections::HashMap;
|
|||||||
use std::num::{ParseFloatError, ParseIntError};
|
use std::num::{ParseFloatError, ParseIntError};
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use tantivy_query_grammar::{UserInputAST, UserInputBound, UserInputLeaf};
|
use tantivy_query_grammar::{UserInputAst, UserInputBound, UserInputLeaf};
|
||||||
|
|
||||||
/// Possible error that may happen when parsing a query.
|
/// Possible error that may happen when parsing a query.
|
||||||
#[derive(Debug, PartialEq, Eq, Error)]
|
#[derive(Debug, PartialEq, Eq, Error)]
|
||||||
@@ -91,9 +91,9 @@ impl From<chrono::ParseError> for QueryParserError {
|
|||||||
/// Recursively remove empty clause from the AST
|
/// Recursively remove empty clause from the AST
|
||||||
///
|
///
|
||||||
/// Returns `None` iff the `logical_ast` ended up being empty.
|
/// Returns `None` iff the `logical_ast` ended up being empty.
|
||||||
fn trim_ast(logical_ast: LogicalAST) -> Option<LogicalAST> {
|
fn trim_ast(logical_ast: LogicalAst) -> Option<LogicalAst> {
|
||||||
match logical_ast {
|
match logical_ast {
|
||||||
LogicalAST::Clause(children) => {
|
LogicalAst::Clause(children) => {
|
||||||
let trimmed_children = children
|
let trimmed_children = children
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.flat_map(|(occur, child)| {
|
.flat_map(|(occur, child)| {
|
||||||
@@ -103,7 +103,7 @@ fn trim_ast(logical_ast: LogicalAST) -> Option<LogicalAST> {
|
|||||||
if trimmed_children.is_empty() {
|
if trimmed_children.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(LogicalAST::Clause(trimmed_children))
|
Some(LogicalAst::Clause(trimmed_children))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => Some(logical_ast),
|
_ => Some(logical_ast),
|
||||||
@@ -178,11 +178,11 @@ pub struct QueryParser {
|
|||||||
boost: HashMap<Field, Score>,
|
boost: HashMap<Field, Score>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn all_negative(ast: &LogicalAST) -> bool {
|
fn all_negative(ast: &LogicalAst) -> bool {
|
||||||
match ast {
|
match ast {
|
||||||
LogicalAST::Leaf(_) => false,
|
LogicalAst::Leaf(_) => false,
|
||||||
LogicalAST::Boost(ref child_ast, _) => all_negative(&*child_ast),
|
LogicalAst::Boost(ref child_ast, _) => all_negative(&*child_ast),
|
||||||
LogicalAST::Clause(children) => children
|
LogicalAst::Clause(children) => children
|
||||||
.iter()
|
.iter()
|
||||||
.all(|(ref occur, child)| (*occur == Occur::MustNot) || all_negative(child)),
|
.all(|(ref occur, child)| (*occur == Occur::MustNot) || all_negative(child)),
|
||||||
}
|
}
|
||||||
@@ -251,7 +251,7 @@ impl QueryParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the user query into an AST.
|
/// Parse the user query into an AST.
|
||||||
fn parse_query_to_logical_ast(&self, query: &str) -> Result<LogicalAST, QueryParserError> {
|
fn parse_query_to_logical_ast(&self, query: &str) -> Result<LogicalAst, QueryParserError> {
|
||||||
let user_input_ast =
|
let user_input_ast =
|
||||||
tantivy_query_grammar::parse_query(query).map_err(|_| QueryParserError::SyntaxError)?;
|
tantivy_query_grammar::parse_query(query).map_err(|_| QueryParserError::SyntaxError)?;
|
||||||
self.compute_logical_ast(user_input_ast)
|
self.compute_logical_ast(user_input_ast)
|
||||||
@@ -265,10 +265,10 @@ impl QueryParser {
|
|||||||
|
|
||||||
fn compute_logical_ast(
|
fn compute_logical_ast(
|
||||||
&self,
|
&self,
|
||||||
user_input_ast: UserInputAST,
|
user_input_ast: UserInputAst,
|
||||||
) -> Result<LogicalAST, QueryParserError> {
|
) -> Result<LogicalAst, QueryParserError> {
|
||||||
let ast = self.compute_logical_ast_with_occur(user_input_ast)?;
|
let ast = self.compute_logical_ast_with_occur(user_input_ast)?;
|
||||||
if let LogicalAST::Clause(children) = &ast {
|
if let LogicalAst::Clause(children) = &ast {
|
||||||
if children.is_empty() {
|
if children.is_empty() {
|
||||||
return Ok(ast);
|
return Ok(ast);
|
||||||
}
|
}
|
||||||
@@ -429,24 +429,24 @@ impl QueryParser {
|
|||||||
|
|
||||||
fn compute_logical_ast_with_occur(
|
fn compute_logical_ast_with_occur(
|
||||||
&self,
|
&self,
|
||||||
user_input_ast: UserInputAST,
|
user_input_ast: UserInputAst,
|
||||||
) -> Result<LogicalAST, QueryParserError> {
|
) -> Result<LogicalAst, QueryParserError> {
|
||||||
match user_input_ast {
|
match user_input_ast {
|
||||||
UserInputAST::Clause(sub_queries) => {
|
UserInputAst::Clause(sub_queries) => {
|
||||||
let default_occur = self.default_occur();
|
let default_occur = self.default_occur();
|
||||||
let mut logical_sub_queries: Vec<(Occur, LogicalAST)> = Vec::new();
|
let mut logical_sub_queries: Vec<(Occur, LogicalAst)> = Vec::new();
|
||||||
for (occur_opt, sub_ast) in sub_queries {
|
for (occur_opt, sub_ast) in sub_queries {
|
||||||
let sub_ast = self.compute_logical_ast_with_occur(sub_ast)?;
|
let sub_ast = self.compute_logical_ast_with_occur(sub_ast)?;
|
||||||
let occur = occur_opt.unwrap_or(default_occur);
|
let occur = occur_opt.unwrap_or(default_occur);
|
||||||
logical_sub_queries.push((occur, sub_ast));
|
logical_sub_queries.push((occur, sub_ast));
|
||||||
}
|
}
|
||||||
Ok(LogicalAST::Clause(logical_sub_queries))
|
Ok(LogicalAst::Clause(logical_sub_queries))
|
||||||
}
|
}
|
||||||
UserInputAST::Boost(ast, boost) => {
|
UserInputAst::Boost(ast, boost) => {
|
||||||
let ast = self.compute_logical_ast_with_occur(*ast)?;
|
let ast = self.compute_logical_ast_with_occur(*ast)?;
|
||||||
Ok(ast.boost(boost as Score))
|
Ok(ast.boost(boost as Score))
|
||||||
}
|
}
|
||||||
UserInputAST::Leaf(leaf) => self.compute_logical_ast_from_leaf(*leaf),
|
UserInputAst::Leaf(leaf) => self.compute_logical_ast_from_leaf(*leaf),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -457,7 +457,7 @@ impl QueryParser {
|
|||||||
fn compute_logical_ast_from_leaf(
|
fn compute_logical_ast_from_leaf(
|
||||||
&self,
|
&self,
|
||||||
leaf: UserInputLeaf,
|
leaf: UserInputLeaf,
|
||||||
) -> Result<LogicalAST, QueryParserError> {
|
) -> Result<LogicalAst, QueryParserError> {
|
||||||
match leaf {
|
match leaf {
|
||||||
UserInputLeaf::Literal(literal) => {
|
UserInputLeaf::Literal(literal) => {
|
||||||
let term_phrases: Vec<(Field, String)> = match literal.field_name {
|
let term_phrases: Vec<(Field, String)> = match literal.field_name {
|
||||||
@@ -476,22 +476,22 @@ impl QueryParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut asts: Vec<LogicalAST> = Vec::new();
|
let mut asts: Vec<LogicalAst> = Vec::new();
|
||||||
for (field, phrase) in term_phrases {
|
for (field, phrase) in term_phrases {
|
||||||
if let Some(ast) = self.compute_logical_ast_for_leaf(field, &phrase)? {
|
if let Some(ast) = self.compute_logical_ast_for_leaf(field, &phrase)? {
|
||||||
// Apply some field specific boost defined at the query parser level.
|
// Apply some field specific boost defined at the query parser level.
|
||||||
let boost = self.field_boost(field);
|
let boost = self.field_boost(field);
|
||||||
asts.push(LogicalAST::Leaf(Box::new(ast)).boost(boost));
|
asts.push(LogicalAst::Leaf(Box::new(ast)).boost(boost));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let result_ast: LogicalAST = if asts.len() == 1 {
|
let result_ast: LogicalAst = if asts.len() == 1 {
|
||||||
asts.into_iter().next().unwrap()
|
asts.into_iter().next().unwrap()
|
||||||
} else {
|
} else {
|
||||||
LogicalAST::Clause(asts.into_iter().map(|ast| (Occur::Should, ast)).collect())
|
LogicalAst::Clause(asts.into_iter().map(|ast| (Occur::Should, ast)).collect())
|
||||||
};
|
};
|
||||||
Ok(result_ast)
|
Ok(result_ast)
|
||||||
}
|
}
|
||||||
UserInputLeaf::All => Ok(LogicalAST::Leaf(Box::new(LogicalLiteral::All))),
|
UserInputLeaf::All => Ok(LogicalAst::Leaf(Box::new(LogicalLiteral::All))),
|
||||||
UserInputLeaf::Range {
|
UserInputLeaf::Range {
|
||||||
field,
|
field,
|
||||||
lower,
|
lower,
|
||||||
@@ -504,7 +504,7 @@ impl QueryParser {
|
|||||||
let boost = self.field_boost(field);
|
let boost = self.field_boost(field);
|
||||||
let field_entry = self.schema.get_field_entry(field);
|
let field_entry = self.schema.get_field_entry(field);
|
||||||
let value_type = field_entry.field_type().value_type();
|
let value_type = field_entry.field_type().value_type();
|
||||||
let logical_ast = LogicalAST::Leaf(Box::new(LogicalLiteral::Range {
|
let logical_ast = LogicalAst::Leaf(Box::new(LogicalLiteral::Range {
|
||||||
field,
|
field,
|
||||||
value_type,
|
value_type,
|
||||||
lower: self.resolve_bound(field, &lower)?,
|
lower: self.resolve_bound(field, &lower)?,
|
||||||
@@ -516,7 +516,7 @@ impl QueryParser {
|
|||||||
let result_ast = if clauses.len() == 1 {
|
let result_ast = if clauses.len() == 1 {
|
||||||
clauses.pop().unwrap()
|
clauses.pop().unwrap()
|
||||||
} else {
|
} else {
|
||||||
LogicalAST::Clause(
|
LogicalAst::Clause(
|
||||||
clauses
|
clauses
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|clause| (Occur::Should, clause))
|
.map(|clause| (Occur::Should, clause))
|
||||||
@@ -547,9 +547,9 @@ fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box<dyn Query> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_to_query(logical_ast: LogicalAST) -> Box<dyn Query> {
|
fn convert_to_query(logical_ast: LogicalAst) -> Box<dyn Query> {
|
||||||
match trim_ast(logical_ast) {
|
match trim_ast(logical_ast) {
|
||||||
Some(LogicalAST::Clause(trimmed_clause)) => {
|
Some(LogicalAst::Clause(trimmed_clause)) => {
|
||||||
let occur_subqueries = trimmed_clause
|
let occur_subqueries = trimmed_clause
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(occur, subquery)| (occur, convert_to_query(subquery)))
|
.map(|(occur, subquery)| (occur, convert_to_query(subquery)))
|
||||||
@@ -560,10 +560,10 @@ fn convert_to_query(logical_ast: LogicalAST) -> Box<dyn Query> {
|
|||||||
);
|
);
|
||||||
Box::new(BooleanQuery::new(occur_subqueries))
|
Box::new(BooleanQuery::new(occur_subqueries))
|
||||||
}
|
}
|
||||||
Some(LogicalAST::Leaf(trimmed_logical_literal)) => {
|
Some(LogicalAst::Leaf(trimmed_logical_literal)) => {
|
||||||
convert_literal_to_query(*trimmed_logical_literal)
|
convert_literal_to_query(*trimmed_logical_literal)
|
||||||
}
|
}
|
||||||
Some(LogicalAST::Boost(ast, boost)) => {
|
Some(LogicalAst::Boost(ast, boost)) => {
|
||||||
let query = convert_to_query(*ast);
|
let query = convert_to_query(*ast);
|
||||||
let boosted_query = BoostQuery::new(query, boost);
|
let boosted_query = BoostQuery::new(query, boost);
|
||||||
Box::new(boosted_query)
|
Box::new(boosted_query)
|
||||||
@@ -632,7 +632,7 @@ mod test {
|
|||||||
fn parse_query_to_logical_ast(
|
fn parse_query_to_logical_ast(
|
||||||
query: &str,
|
query: &str,
|
||||||
default_conjunction: bool,
|
default_conjunction: bool,
|
||||||
) -> Result<LogicalAST, QueryParserError> {
|
) -> Result<LogicalAst, QueryParserError> {
|
||||||
let mut query_parser = make_query_parser();
|
let mut query_parser = make_query_parser();
|
||||||
if default_conjunction {
|
if default_conjunction {
|
||||||
query_parser.set_conjunction_by_default();
|
query_parser.set_conjunction_by_default();
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use super::term_weight::TermWeight;
|
use super::term_weight::TermWeight;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::Bm25Weight;
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
use crate::query::{Explanation, Query};
|
use crate::query::{Explanation, Query};
|
||||||
use crate::schema::IndexRecordOption;
|
use crate::schema::IndexRecordOption;
|
||||||
@@ -102,10 +102,10 @@ impl TermQuery {
|
|||||||
}
|
}
|
||||||
let bm25_weight;
|
let bm25_weight;
|
||||||
if scoring_enabled {
|
if scoring_enabled {
|
||||||
bm25_weight = BM25Weight::for_terms(searcher, &[term])?;
|
bm25_weight = Bm25Weight::for_terms(searcher, &[term])?;
|
||||||
} else {
|
} else {
|
||||||
bm25_weight =
|
bm25_weight =
|
||||||
BM25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32);
|
Bm25Weight::new(Explanation::new("<no score>".to_string(), 1.0f32), 1.0f32);
|
||||||
}
|
}
|
||||||
let index_record_option = if scoring_enabled {
|
let index_record_option = if scoring_enabled {
|
||||||
self.index_record_option
|
self.index_record_option
|
||||||
|
|||||||
@@ -6,20 +6,20 @@ use crate::Score;
|
|||||||
use crate::fieldnorm::FieldNormReader;
|
use crate::fieldnorm::FieldNormReader;
|
||||||
use crate::postings::SegmentPostings;
|
use crate::postings::SegmentPostings;
|
||||||
use crate::postings::{FreqReadingOption, Postings};
|
use crate::postings::{FreqReadingOption, Postings};
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::Bm25Weight;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct TermScorer {
|
pub struct TermScorer {
|
||||||
postings: SegmentPostings,
|
postings: SegmentPostings,
|
||||||
fieldnorm_reader: FieldNormReader,
|
fieldnorm_reader: FieldNormReader,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TermScorer {
|
impl TermScorer {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
postings: SegmentPostings,
|
postings: SegmentPostings,
|
||||||
fieldnorm_reader: FieldNormReader,
|
fieldnorm_reader: FieldNormReader,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
) -> TermScorer {
|
) -> TermScorer {
|
||||||
TermScorer {
|
TermScorer {
|
||||||
postings,
|
postings,
|
||||||
@@ -36,7 +36,7 @@ impl TermScorer {
|
|||||||
pub fn create_for_test(
|
pub fn create_for_test(
|
||||||
doc_and_tfs: &[(DocId, u32)],
|
doc_and_tfs: &[(DocId, u32)],
|
||||||
fieldnorms: &[u32],
|
fieldnorms: &[u32],
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
) -> TermScorer {
|
) -> TermScorer {
|
||||||
assert!(!doc_and_tfs.is_empty());
|
assert!(!doc_and_tfs.is_empty());
|
||||||
assert!(
|
assert!(
|
||||||
@@ -131,7 +131,7 @@ mod tests {
|
|||||||
use crate::merge_policy::NoMergePolicy;
|
use crate::merge_policy::NoMergePolicy;
|
||||||
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
|
||||||
use crate::query::term_query::TermScorer;
|
use crate::query::term_query::TermScorer;
|
||||||
use crate::query::{BM25Weight, Scorer, TermQuery};
|
use crate::query::{Bm25Weight, Scorer, TermQuery};
|
||||||
use crate::schema::{IndexRecordOption, Schema, TEXT};
|
use crate::schema::{IndexRecordOption, Schema, TEXT};
|
||||||
use crate::Score;
|
use crate::Score;
|
||||||
use crate::{assert_nearly_equals, Index, Searcher, SegmentId, Term};
|
use crate::{assert_nearly_equals, Index, Searcher, SegmentId, Term};
|
||||||
@@ -141,7 +141,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_term_scorer_max_score() -> crate::Result<()> {
|
fn test_term_scorer_max_score() -> crate::Result<()> {
|
||||||
let bm25_weight = BM25Weight::for_one_term(3, 6, 10.0);
|
let bm25_weight = Bm25Weight::for_one_term(3, 6, 10.0);
|
||||||
let mut term_scorer = TermScorer::create_for_test(
|
let mut term_scorer = TermScorer::create_for_test(
|
||||||
&[(2, 3), (3, 12), (7, 8)],
|
&[(2, 3), (3, 12), (7, 8)],
|
||||||
&[0, 0, 10, 12, 0, 0, 0, 100],
|
&[0, 0, 10, 12, 0, 0, 0, 100],
|
||||||
@@ -167,7 +167,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_term_scorer_shallow_advance() -> crate::Result<()> {
|
fn test_term_scorer_shallow_advance() -> crate::Result<()> {
|
||||||
let bm25_weight = BM25Weight::for_one_term(300, 1024, 10.0);
|
let bm25_weight = Bm25Weight::for_one_term(300, 1024, 10.0);
|
||||||
let mut doc_and_tfs = vec![];
|
let mut doc_and_tfs = vec![];
|
||||||
for i in 0u32..300u32 {
|
for i in 0u32..300u32 {
|
||||||
let doc = i * 10;
|
let doc = i * 10;
|
||||||
@@ -205,7 +205,7 @@ mod tests {
|
|||||||
// Average fieldnorm is over the entire index,
|
// Average fieldnorm is over the entire index,
|
||||||
// not necessarily the docs that are in the posting list.
|
// not necessarily the docs that are in the posting list.
|
||||||
// For this reason we multiply by 1.1 to make a realistic value.
|
// For this reason we multiply by 1.1 to make a realistic value.
|
||||||
let bm25_weight = BM25Weight::for_one_term(term_doc_freq as u64,
|
let bm25_weight = Bm25Weight::for_one_term(term_doc_freq as u64,
|
||||||
term_doc_freq as u64 * 10u64,
|
term_doc_freq as u64 * 10u64,
|
||||||
average_fieldnorm);
|
average_fieldnorm);
|
||||||
|
|
||||||
@@ -240,7 +240,7 @@ mod tests {
|
|||||||
doc_tfs.push((258, 1u32));
|
doc_tfs.push((258, 1u32));
|
||||||
|
|
||||||
let fieldnorms: Vec<u32> = std::iter::repeat(20u32).take(300).collect();
|
let fieldnorms: Vec<u32> = std::iter::repeat(20u32).take(300).collect();
|
||||||
let bm25_weight = BM25Weight::for_one_term(10, 129, 20.0);
|
let bm25_weight = Bm25Weight::for_one_term(10, 129, 20.0);
|
||||||
let mut docs = TermScorer::create_for_test(&doc_tfs[..], &fieldnorms[..], bm25_weight);
|
let mut docs = TermScorer::create_for_test(&doc_tfs[..], &fieldnorms[..], bm25_weight);
|
||||||
assert_nearly_equals!(docs.block_max_score(), 2.5161593);
|
assert_nearly_equals!(docs.block_max_score(), 2.5161593);
|
||||||
docs.shallow_seek(135);
|
docs.shallow_seek(135);
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use crate::core::SegmentReader;
|
|||||||
use crate::docset::DocSet;
|
use crate::docset::DocSet;
|
||||||
use crate::fieldnorm::FieldNormReader;
|
use crate::fieldnorm::FieldNormReader;
|
||||||
use crate::postings::SegmentPostings;
|
use crate::postings::SegmentPostings;
|
||||||
use crate::query::bm25::BM25Weight;
|
use crate::query::bm25::Bm25Weight;
|
||||||
use crate::query::explanation::does_not_match;
|
use crate::query::explanation::does_not_match;
|
||||||
use crate::query::weight::for_each_scorer;
|
use crate::query::weight::for_each_scorer;
|
||||||
use crate::query::Weight;
|
use crate::query::Weight;
|
||||||
@@ -15,7 +15,7 @@ use crate::{DocId, Score};
|
|||||||
pub struct TermWeight {
|
pub struct TermWeight {
|
||||||
term: Term,
|
term: Term,
|
||||||
index_record_option: IndexRecordOption,
|
index_record_option: IndexRecordOption,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
scoring_enabled: bool,
|
scoring_enabled: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,7 +88,7 @@ impl TermWeight {
|
|||||||
pub fn new(
|
pub fn new(
|
||||||
term: Term,
|
term: Term,
|
||||||
index_record_option: IndexRecordOption,
|
index_record_option: IndexRecordOption,
|
||||||
similarity_weight: BM25Weight,
|
similarity_weight: Bm25Weight,
|
||||||
scoring_enabled: bool,
|
scoring_enabled: bool,
|
||||||
) -> TermWeight {
|
) -> TermWeight {
|
||||||
TermWeight {
|
TermWeight {
|
||||||
|
|||||||
@@ -309,7 +309,7 @@ impl Schema {
|
|||||||
} else {
|
} else {
|
||||||
format!("{:?}...", &doc_json[0..20])
|
format!("{:?}...", &doc_json[0..20])
|
||||||
};
|
};
|
||||||
DocParsingError::NotJSON(doc_json_sample)
|
DocParsingError::NotJson(doc_json_sample)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut doc = Document::default();
|
let mut doc = Document::default();
|
||||||
@@ -394,7 +394,7 @@ impl<'de> Deserialize<'de> for Schema {
|
|||||||
pub enum DocParsingError {
|
pub enum DocParsingError {
|
||||||
/// The payload given is not valid JSON.
|
/// The payload given is not valid JSON.
|
||||||
#[error("The provided string is not valid JSON")]
|
#[error("The provided string is not valid JSON")]
|
||||||
NotJSON(String),
|
NotJson(String),
|
||||||
/// One of the value node could not be parsed.
|
/// One of the value node could not be parsed.
|
||||||
#[error("The field '{0:?}' could not be parsed: {1:?}")]
|
#[error("The field '{0:?}' could not be parsed: {1:?}")]
|
||||||
ValueError(String, ValueParsingError),
|
ValueError(String, ValueParsingError),
|
||||||
@@ -408,7 +408,7 @@ mod tests {
|
|||||||
|
|
||||||
use crate::schema::field_type::ValueParsingError;
|
use crate::schema::field_type::ValueParsingError;
|
||||||
use crate::schema::int_options::Cardinality::SingleValue;
|
use crate::schema::int_options::Cardinality::SingleValue;
|
||||||
use crate::schema::schema::DocParsingError::NotJSON;
|
use crate::schema::schema::DocParsingError::NotJson;
|
||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
use matches::{assert_matches, matches};
|
use matches::{assert_matches, matches};
|
||||||
use serde_json;
|
use serde_json;
|
||||||
@@ -737,7 +737,7 @@ mod tests {
|
|||||||
"count": 50,
|
"count": 50,
|
||||||
}"#,
|
}"#,
|
||||||
);
|
);
|
||||||
assert_matches!(json_err, Err(NotJSON(_)));
|
assert_matches!(json_err, Err(NotJson(_)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -122,14 +122,14 @@ impl SegmentSpaceUsage {
|
|||||||
use self::ComponentSpaceUsage::*;
|
use self::ComponentSpaceUsage::*;
|
||||||
use crate::SegmentComponent::*;
|
use crate::SegmentComponent::*;
|
||||||
match component {
|
match component {
|
||||||
POSTINGS => PerField(self.postings().clone()),
|
Postings => PerField(self.postings().clone()),
|
||||||
POSITIONS => PerField(self.positions().clone()),
|
Positions => PerField(self.positions().clone()),
|
||||||
POSITIONSSKIP => PerField(self.positions_skip_idx().clone()),
|
PositionsSkip => PerField(self.positions_skip_idx().clone()),
|
||||||
FASTFIELDS => PerField(self.fast_fields().clone()),
|
FastFields => PerField(self.fast_fields().clone()),
|
||||||
FIELDNORMS => PerField(self.fieldnorms().clone()),
|
FieldNorms => PerField(self.fieldnorms().clone()),
|
||||||
TERMS => PerField(self.termdict().clone()),
|
Terms => PerField(self.termdict().clone()),
|
||||||
STORE => Store(self.store().clone()),
|
SegmentComponent::Store => ComponentSpaceUsage::Store(self.store().clone()),
|
||||||
DELETE => Basic(self.deletes()),
|
Delete => Basic(self.deletes()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ use self::compression_snap::{compress, decompress};
|
|||||||
pub mod tests {
|
pub mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::directory::{Directory, RAMDirectory, WritePtr};
|
use crate::directory::{Directory, RamDirectory, WritePtr};
|
||||||
use crate::schema::Document;
|
use crate::schema::Document;
|
||||||
use crate::schema::FieldValue;
|
use crate::schema::FieldValue;
|
||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
@@ -146,7 +146,7 @@ pub mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_store() -> crate::Result<()> {
|
fn test_store() -> crate::Result<()> {
|
||||||
let path = Path::new("store");
|
let path = Path::new("store");
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let store_wrt = directory.open_write(path)?;
|
let store_wrt = directory.open_write(path)?;
|
||||||
let schema = write_lorem_ipsum_store(store_wrt, 1_000);
|
let schema = write_lorem_ipsum_store(store_wrt, 1_000);
|
||||||
let field_title = schema.get_field("title").unwrap();
|
let field_title = schema.get_field("title").unwrap();
|
||||||
@@ -172,7 +172,7 @@ mod bench {
|
|||||||
|
|
||||||
use super::tests::write_lorem_ipsum_store;
|
use super::tests::write_lorem_ipsum_store;
|
||||||
use crate::directory::Directory;
|
use crate::directory::Directory;
|
||||||
use crate::directory::RAMDirectory;
|
use crate::directory::RamDirectory;
|
||||||
use crate::store::StoreReader;
|
use crate::store::StoreReader;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use test::Bencher;
|
use test::Bencher;
|
||||||
@@ -180,7 +180,7 @@ mod bench {
|
|||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "mmap")]
|
#[cfg(feature = "mmap")]
|
||||||
fn bench_store_encode(b: &mut Bencher) {
|
fn bench_store_encode(b: &mut Bencher) {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = Path::new("store");
|
let path = Path::new("store");
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000);
|
write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000);
|
||||||
@@ -190,7 +190,7 @@ mod bench {
|
|||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_store_decode(b: &mut Bencher) {
|
fn bench_store_decode(b: &mut Bencher) {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = Path::new("store");
|
let path = Path::new("store");
|
||||||
write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000);
|
write_lorem_ipsum_store(directory.open_write(path).unwrap(), 1_000);
|
||||||
let store_file = directory.open_read(path).unwrap();
|
let store_file = directory.open_read(path).unwrap();
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::schema::Document;
|
use crate::schema::Document;
|
||||||
use crate::schema::Field;
|
use crate::schema::Field;
|
||||||
use crate::{directory::RAMDirectory, store::tests::write_lorem_ipsum_store, Directory};
|
use crate::{directory::RamDirectory, store::tests::write_lorem_ipsum_store, Directory};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
fn get_text_field<'a>(doc: &'a Document, field: &'a Field) -> Option<&'a str> {
|
fn get_text_field<'a>(doc: &'a Document, field: &'a Field) -> Option<&'a str> {
|
||||||
@@ -133,7 +133,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_store_lru_cache() -> crate::Result<()> {
|
fn test_store_lru_cache() -> crate::Result<()> {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = Path::new("store");
|
let path = Path::new("store");
|
||||||
let writer = directory.open_write(path)?;
|
let writer = directory.open_write(path)?;
|
||||||
let schema = write_lorem_ipsum_store(writer, 500);
|
let schema = write_lorem_ipsum_store(writer, 500);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use super::{TermDictionary, TermDictionaryBuilder, TermStreamer};
|
use super::{TermDictionary, TermDictionaryBuilder, TermStreamer};
|
||||||
|
|
||||||
use crate::directory::{Directory, FileSlice, RAMDirectory, TerminatingWrite};
|
use crate::directory::{Directory, FileSlice, RamDirectory, TerminatingWrite};
|
||||||
use crate::postings::TermInfo;
|
use crate::postings::TermInfo;
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@@ -34,7 +34,7 @@ fn test_term_ordinals() -> crate::Result<()> {
|
|||||||
"Sweden",
|
"Sweden",
|
||||||
"Switzerland",
|
"Switzerland",
|
||||||
];
|
];
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = PathBuf::from("TermDictionary");
|
let path = PathBuf::from("TermDictionary");
|
||||||
{
|
{
|
||||||
let write = directory.open_write(&path)?;
|
let write = directory.open_write(&path)?;
|
||||||
@@ -57,7 +57,7 @@ fn test_term_ordinals() -> crate::Result<()> {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_term_dictionary_simple() -> crate::Result<()> {
|
fn test_term_dictionary_simple() -> crate::Result<()> {
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = PathBuf::from("TermDictionary");
|
let path = PathBuf::from("TermDictionary");
|
||||||
{
|
{
|
||||||
let write = directory.open_write(&path)?;
|
let write = directory.open_write(&path)?;
|
||||||
@@ -380,7 +380,7 @@ fn test_stream_term_ord() -> crate::Result<()> {
|
|||||||
let termdict = stream_range_test_dict()?;
|
let termdict = stream_range_test_dict()?;
|
||||||
let mut stream = termdict.stream()?;
|
let mut stream = termdict.stream()?;
|
||||||
for b in 0u8..10u8 {
|
for b in 0u8..10u8 {
|
||||||
assert!(stream.advance(), true);
|
assert!(stream.advance());
|
||||||
assert_eq!(stream.term_ord(), b as u64);
|
assert_eq!(stream.term_ord(), b as u64);
|
||||||
assert_eq!(stream.key(), &[b]);
|
assert_eq!(stream.key(), &[b]);
|
||||||
}
|
}
|
||||||
@@ -390,7 +390,7 @@ fn test_stream_term_ord() -> crate::Result<()> {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_automaton_search() -> crate::Result<()> {
|
fn test_automaton_search() -> crate::Result<()> {
|
||||||
use crate::query::DFAWrapper;
|
use crate::query::DfaWrapper;
|
||||||
use levenshtein_automata::LevenshteinAutomatonBuilder;
|
use levenshtein_automata::LevenshteinAutomatonBuilder;
|
||||||
|
|
||||||
const COUNTRIES: [&'static str; 7] = [
|
const COUNTRIES: [&'static str; 7] = [
|
||||||
@@ -403,7 +403,7 @@ fn test_automaton_search() -> crate::Result<()> {
|
|||||||
"Switzerland",
|
"Switzerland",
|
||||||
];
|
];
|
||||||
|
|
||||||
let directory = RAMDirectory::create();
|
let directory = RamDirectory::create();
|
||||||
let path = PathBuf::from("TermDictionary");
|
let path = PathBuf::from("TermDictionary");
|
||||||
{
|
{
|
||||||
let write = directory.open_write(&path)?;
|
let write = directory.open_write(&path)?;
|
||||||
@@ -418,7 +418,7 @@ fn test_automaton_search() -> crate::Result<()> {
|
|||||||
|
|
||||||
// We can now build an entire dfa.
|
// We can now build an entire dfa.
|
||||||
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(2, true);
|
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(2, true);
|
||||||
let automaton = DFAWrapper(lev_automaton_builder.build_dfa("Spaen"));
|
let automaton = DfaWrapper(lev_automaton_builder.build_dfa("Spaen"));
|
||||||
|
|
||||||
let mut range = term_dict.search(automaton).into_stream()?;
|
let mut range = term_dict.search(automaton).into_stream()?;
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use fail;
|
use fail;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use tantivy::directory::{Directory, ManagedDirectory, RAMDirectory, TerminatingWrite};
|
use tantivy::directory::{Directory, ManagedDirectory, RamDirectory, TerminatingWrite};
|
||||||
use tantivy::doc;
|
use tantivy::doc;
|
||||||
use tantivy::schema::{Schema, TEXT};
|
use tantivy::schema::{Schema, TEXT};
|
||||||
use tantivy::{Index, Term};
|
use tantivy::{Index, Term};
|
||||||
@@ -11,7 +11,7 @@ fn test_failpoints_managed_directory_gc_if_delete_fails() {
|
|||||||
|
|
||||||
let test_path: &'static Path = Path::new("some_path_for_test");
|
let test_path: &'static Path = Path::new("some_path_for_test");
|
||||||
|
|
||||||
let ram_directory = RAMDirectory::create();
|
let ram_directory = RamDirectory::create();
|
||||||
let mut managed_directory = ManagedDirectory::wrap(ram_directory).unwrap();
|
let mut managed_directory = ManagedDirectory::wrap(ram_directory).unwrap();
|
||||||
managed_directory
|
managed_directory
|
||||||
.open_write(test_path)
|
.open_write(test_path)
|
||||||
@@ -27,7 +27,7 @@ fn test_failpoints_managed_directory_gc_if_delete_fails() {
|
|||||||
//
|
//
|
||||||
// The initial 1*off is there to allow for the removal of the
|
// The initial 1*off is there to allow for the removal of the
|
||||||
// lock file.
|
// lock file.
|
||||||
fail::cfg("RAMDirectory::delete", "1*off->1*return").unwrap();
|
fail::cfg("RamDirectory::delete", "1*off->1*return").unwrap();
|
||||||
assert!(managed_directory.garbage_collect(Default::default).is_ok());
|
assert!(managed_directory.garbage_collect(Default::default).is_ok());
|
||||||
assert!(managed_directory.exists(test_path).unwrap());
|
assert!(managed_directory.exists(test_path).unwrap());
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ fn test_write_commit_fails() -> tantivy::Result<()> {
|
|||||||
index_writer.add_document(doc!(text_field => "a"));
|
index_writer.add_document(doc!(text_field => "a"));
|
||||||
}
|
}
|
||||||
index_writer.commit()?;
|
index_writer.commit()?;
|
||||||
fail::cfg("RAMDirectory::atomic_write", "return(error_write_failed)").unwrap();
|
fail::cfg("RamDirectory::atomic_write", "return(error_write_failed)").unwrap();
|
||||||
for _ in 0..100 {
|
for _ in 0..100 {
|
||||||
index_writer.add_document(doc!(text_field => "b"));
|
index_writer.add_document(doc!(text_field => "b"));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user