mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-09 10:32:55 +00:00
Using tantivy-fst version 0.3.
This commit is contained in:
@@ -18,7 +18,7 @@ byteorder = "1.0"
|
||||
crc32fast = "1.2.0"
|
||||
once_cell = "1.0"
|
||||
regex ={version = "1.3.0", default-features = false, features = ["std"]}
|
||||
tantivy-fst = "0.2.1"
|
||||
tantivy-fst = {path="../tantivy-fst", version="0.3"}
|
||||
memmap = {version = "0.7", optional=true}
|
||||
lz4 = {version="1.20", optional=true}
|
||||
snap = "1"
|
||||
@@ -29,7 +29,7 @@ serde = {version="1.0", features=["derive"]}
|
||||
serde_json = "1.0"
|
||||
num_cpus = "1.2"
|
||||
fs2={version="0.4", optional=true}
|
||||
levenshtein_automata = "0.1"
|
||||
levenshtein_automata = "0.2"
|
||||
notify = {version="4", optional=true}
|
||||
uuid = { version = "0.8", features = ["v4", "serde"] }
|
||||
crossbeam = "0.7"
|
||||
|
||||
@@ -2,10 +2,36 @@ use crate::query::{AutomatonWeight, Query, Weight};
|
||||
use crate::schema::Term;
|
||||
use crate::Searcher;
|
||||
use crate::TantivyError::InvalidArgument;
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder, DFA};
|
||||
use levenshtein_automata::{Distance, LevenshteinAutomatonBuilder, DFA};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::collections::HashMap;
|
||||
use std::ops::Range;
|
||||
use tantivy_fst::Automaton;
|
||||
|
||||
struct DFAWrapper(pub DFA);
|
||||
|
||||
impl Automaton for DFAWrapper {
|
||||
type State = u32;
|
||||
|
||||
fn start(&self) -> Self::State {
|
||||
self.0.initial_state()
|
||||
}
|
||||
|
||||
fn is_match(&self, state: &Self::State) -> bool {
|
||||
match self.0.distance(*state) {
|
||||
Distance::Exact(_) => true,
|
||||
Distance::AtLeast(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn can_match(&self, state: &u32) -> bool {
|
||||
state != levenshtein_automata::SINK_STATE
|
||||
}
|
||||
|
||||
fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
|
||||
self.0.transition(*state, byte)
|
||||
}
|
||||
}
|
||||
|
||||
/// A range of Levenshtein distances that we will build DFAs for our terms
|
||||
/// The computation is exponential, so best keep it to low single digits
|
||||
@@ -101,7 +127,7 @@ impl FuzzyTermQuery {
|
||||
}
|
||||
}
|
||||
|
||||
fn specialized_weight(&self) -> crate::Result<AutomatonWeight<DFA>> {
|
||||
fn specialized_weight(&self) -> crate::Result<AutomatonWeight<DFAWrapper>> {
|
||||
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
||||
match LEV_BUILDER.get(&(self.distance, false)) {
|
||||
// Unwrap the option and build the Ok(AutomatonWeight)
|
||||
@@ -111,7 +137,10 @@ impl FuzzyTermQuery {
|
||||
} else {
|
||||
automaton_builder.build_dfa(self.term.text())
|
||||
};
|
||||
Ok(AutomatonWeight::new(self.term.field(), automaton))
|
||||
Ok(AutomatonWeight::new(
|
||||
self.term.field(),
|
||||
DFAWrapper(automaton),
|
||||
))
|
||||
}
|
||||
None => Err(InvalidArgument(format!(
|
||||
"Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
|
||||
|
||||
@@ -55,8 +55,8 @@ pub enum QueryParserError {
|
||||
/// The tokenizer for the given field is unknown
|
||||
/// The two argument strings are the name of the field, the name of the tokenizer
|
||||
#[fail(
|
||||
display = "The tokenizer '{:?}' for the field '{:?}' is unknown",
|
||||
_0, _1
|
||||
display = "The tokenizer '{:?}' for the field '{:?}' is unknown",
|
||||
_0, _1
|
||||
)]
|
||||
UnknownTokenizer(String, String),
|
||||
/// The query contains a range query with a phrase as one of the bounds.
|
||||
@@ -1049,7 +1049,7 @@ mod test {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:a AND title:b",
|
||||
"(+Term(field=0,bytes=[97]) +Term(field=0,bytes=[98]))",
|
||||
default_conjunction
|
||||
default_conjunction,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1060,9 +1060,8 @@ mod test {
|
||||
test_parse_query_to_logical_ast_helper(
|
||||
"title:a OR title:b",
|
||||
"(Term(field=0,bytes=[97]) Term(field=0,bytes=[98]))",
|
||||
default_conjunction
|
||||
default_conjunction,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user