mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
refactor: try upgrade regex-automata (#3575)
* refactor: try upgrade regex-automata Signed-off-by: tison <wander4096@gmail.com> * try fix Signed-off-by: tison <wander4096@gmail.com> * always check match with next_eoi_state Signed-off-by: tison <wander4096@gmail.com> * add a guard to prevent over moving the state Signed-off-by: tison <wander4096@gmail.com> * tidy Signed-off-by: tison <wander4096@gmail.com> --------- Signed-off-by: tison <wander4096@gmail.com>
This commit is contained in:
13
Cargo.lock
generated
13
Cargo.lock
generated
@@ -4378,7 +4378,7 @@ dependencies = [
|
||||
"prost 0.12.3",
|
||||
"rand",
|
||||
"regex",
|
||||
"regex-automata 0.2.0",
|
||||
"regex-automata 0.4.3",
|
||||
"snafu",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -7801,17 +7801,6 @@ dependencies = [
|
||||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782"
|
||||
dependencies = [
|
||||
"fst",
|
||||
"memchr",
|
||||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.3"
|
||||
|
||||
@@ -125,7 +125,7 @@ prost = "0.12"
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
rand = "0.8"
|
||||
regex = "1.8"
|
||||
regex-automata = { version = "0.2", features = ["transducer"] }
|
||||
regex-automata = { version = "0.4" }
|
||||
reqwest = { version = "0.11", default-features = false, features = [
|
||||
"json",
|
||||
"rustls-tls-native-roots",
|
||||
|
||||
@@ -113,7 +113,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to parse regex DFA"))]
|
||||
ParseDFA {
|
||||
#[snafu(source)]
|
||||
error: Box<regex_automata::dfa::Error>,
|
||||
error: Box<regex_automata::dfa::dense::BuildError>,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
|
||||
@@ -17,6 +17,10 @@ use std::mem::size_of;
|
||||
use fst::map::OpBuilder;
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use regex_automata::dfa::dense::DFA;
|
||||
use regex_automata::dfa::Automaton;
|
||||
use regex_automata::util::primitives::StateID;
|
||||
use regex_automata::util::start::Config;
|
||||
use regex_automata::Anchored;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
@@ -32,7 +36,53 @@ pub struct IntersectionFstApplier {
|
||||
ranges: Vec<Range>,
|
||||
|
||||
/// A list of `Dfa` compiled from regular expression patterns.
|
||||
dfas: Vec<DFA<Vec<u32>>>,
|
||||
dfas: Vec<DfaFstAutomaton>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DfaFstAutomaton(DFA<Vec<u32>>);
|
||||
|
||||
impl fst::Automaton for DfaFstAutomaton {
|
||||
type State = StateID;
|
||||
|
||||
#[inline]
|
||||
fn start(&self) -> Self::State {
|
||||
let config = Config::new().anchored(Anchored::No);
|
||||
self.0.start_state(&config).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_match(&self, state: &Self::State) -> bool {
|
||||
self.0.is_match_state(*state)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn can_match(&self, state: &Self::State) -> bool {
|
||||
!self.0.is_dead_state(*state)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn accept_eof(&self, state: &StateID) -> Option<StateID> {
|
||||
if self.0.is_match_state(*state) {
|
||||
return Some(*state);
|
||||
}
|
||||
Some(self.0.next_eoi_state(*state))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn accept(&self, state: &Self::State, byte: u8) -> Self::State {
|
||||
if self.0.is_match_state(*state) {
|
||||
return *state;
|
||||
}
|
||||
self.0.next_state(*state, byte)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntersectionFstApplier {
|
||||
fn new(ranges: Vec<Range>, dfas: Vec<DFA<Vec<u32>>>) -> Self {
|
||||
let dfas = dfas.into_iter().map(DfaFstAutomaton).collect();
|
||||
Self { ranges, dfas }
|
||||
}
|
||||
}
|
||||
|
||||
impl FstApplier for IntersectionFstApplier {
|
||||
@@ -86,7 +136,7 @@ impl FstApplier for IntersectionFstApplier {
|
||||
|
||||
size += self.dfas.capacity() * size_of::<DFA<Vec<u32>>>();
|
||||
for dfa in &self.dfas {
|
||||
size += dfa.memory_usage();
|
||||
size += dfa.0.memory_usage();
|
||||
}
|
||||
size
|
||||
}
|
||||
@@ -119,7 +169,7 @@ impl IntersectionFstApplier {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { dfas, ranges })
|
||||
Ok(Self::new(ranges, dfas))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -365,18 +415,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_intersection_fst_applier_memory_usage() {
|
||||
let applier = IntersectionFstApplier {
|
||||
ranges: vec![],
|
||||
dfas: vec![],
|
||||
};
|
||||
let applier = IntersectionFstApplier::new(vec![], vec![]);
|
||||
|
||||
assert_eq!(applier.memory_usage(), 0);
|
||||
|
||||
let dfa = DFA::new("^abc$").unwrap();
|
||||
assert_eq!(dfa.memory_usage(), 320);
|
||||
|
||||
let applier = IntersectionFstApplier {
|
||||
ranges: vec![Range {
|
||||
let applier = IntersectionFstApplier::new(
|
||||
vec![Range {
|
||||
lower: Some(Bound {
|
||||
value: b"aa".to_vec(),
|
||||
inclusive: true,
|
||||
@@ -386,9 +433,8 @@ mod tests {
|
||||
inclusive: true,
|
||||
}),
|
||||
}],
|
||||
dfas: vec![dfa],
|
||||
};
|
||||
|
||||
vec![dfa],
|
||||
);
|
||||
assert_eq!(
|
||||
applier.memory_usage(),
|
||||
size_of::<Range>() + 4 + size_of::<DFA<Vec<u32>>>() + 320
|
||||
|
||||
Reference in New Issue
Block a user