mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 21:32:58 +00:00
feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 1) (#3034)
* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1) Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * chore: add docs Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: typos Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: address comments Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * Update src/mito2/src/sst/index/applier/builder.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * fix: remove unwrap Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * chore: error source Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
@@ -113,7 +113,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to parse regex DFA"))]
|
||||
ParseDFA {
|
||||
#[snafu(source)]
|
||||
error: regex_automata::Error,
|
||||
error: Box<regex_automata::dfa::Error>,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use fst::map::OpBuilder;
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use regex_automata::DenseDFA;
|
||||
use regex_automata::dfa::dense::DFA;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
@@ -24,15 +24,13 @@ use crate::inverted_index::search::fst_apply::FstApplier;
|
||||
use crate::inverted_index::search::predicate::{Predicate, Range};
|
||||
use crate::inverted_index::FstMap;
|
||||
|
||||
type Dfa = DenseDFA<Vec<usize>, usize>;
|
||||
|
||||
/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns.
|
||||
pub struct IntersectionFstApplier {
|
||||
/// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap.
|
||||
ranges: Vec<Range>,
|
||||
|
||||
/// A list of `Dfa` compiled from regular expression patterns.
|
||||
dfas: Vec<Dfa>,
|
||||
dfas: Vec<DFA<Vec<u32>>>,
|
||||
}
|
||||
|
||||
impl FstApplier for IntersectionFstApplier {
|
||||
@@ -88,8 +86,8 @@ impl IntersectionFstApplier {
|
||||
match predicate {
|
||||
Predicate::Range(range) => ranges.push(range.range),
|
||||
Predicate::RegexMatch(regex) => {
|
||||
let dfa = DenseDFA::new(®ex.pattern);
|
||||
let dfa = dfa.context(ParseDFASnafu)?;
|
||||
let dfa = DFA::new(®ex.pattern);
|
||||
let dfa = dfa.map_err(Box::new).context(ParseDFASnafu)?;
|
||||
dfas.push(dfa);
|
||||
}
|
||||
// Rejection of `InList` predicates is enforced here.
|
||||
@@ -210,47 +208,67 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_intersection_fst_applier_with_valid_pattern() {
|
||||
let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap();
|
||||
let test_fst = FstMap::from_iter([("123", 1), ("abc", 2)]).unwrap();
|
||||
|
||||
let applier = create_applier_from_pattern("a.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1]);
|
||||
let cases = vec![
|
||||
("1", vec![1]),
|
||||
("2", vec![1]),
|
||||
("3", vec![1]),
|
||||
("^1", vec![1]),
|
||||
("^2", vec![]),
|
||||
("^3", vec![]),
|
||||
("^1.*", vec![1]),
|
||||
("^.*2", vec![1]),
|
||||
("^.*3", vec![1]),
|
||||
("1$", vec![]),
|
||||
("2$", vec![]),
|
||||
("3$", vec![1]),
|
||||
("1.*$", vec![1]),
|
||||
("2.*$", vec![1]),
|
||||
("3.*$", vec![1]),
|
||||
("^1..$", vec![1]),
|
||||
("^.2.$", vec![1]),
|
||||
("^..3$", vec![1]),
|
||||
("^[0-9]", vec![1]),
|
||||
("^[0-9]+$", vec![1]),
|
||||
("^[0-9][0-9]$", vec![]),
|
||||
("^[0-9][0-9][0-9]$", vec![1]),
|
||||
("^123$", vec![1]),
|
||||
("a", vec![2]),
|
||||
("b", vec![2]),
|
||||
("c", vec![2]),
|
||||
("^a", vec![2]),
|
||||
("^b", vec![]),
|
||||
("^c", vec![]),
|
||||
("^a.*", vec![2]),
|
||||
("^.*b", vec![2]),
|
||||
("^.*c", vec![2]),
|
||||
("a$", vec![]),
|
||||
("b$", vec![]),
|
||||
("c$", vec![2]),
|
||||
("a.*$", vec![2]),
|
||||
("b.*$", vec![2]),
|
||||
("c.*$", vec![2]),
|
||||
("^.[a-z]", vec![2]),
|
||||
("^abc$", vec![2]),
|
||||
("^ab$", vec![]),
|
||||
("abc$", vec![2]),
|
||||
("^a.c$", vec![2]),
|
||||
("^..c$", vec![2]),
|
||||
("ab", vec![2]),
|
||||
(".*", vec![1, 2]),
|
||||
("", vec![1, 2]),
|
||||
("^$", vec![]),
|
||||
("1|a", vec![1, 2]),
|
||||
("^123$|^abc$", vec![1, 2]),
|
||||
("^123$|d", vec![1]),
|
||||
];
|
||||
|
||||
let applier = create_applier_from_pattern("b.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![2]);
|
||||
|
||||
let applier = create_applier_from_pattern("c.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![3]);
|
||||
|
||||
let applier = create_applier_from_pattern("a.*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1]);
|
||||
|
||||
let applier = create_applier_from_pattern("b.*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![2]);
|
||||
|
||||
let applier = create_applier_from_pattern("c.*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![3]);
|
||||
|
||||
let applier = create_applier_from_pattern("d.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert!(results.is_empty());
|
||||
|
||||
let applier = create_applier_from_pattern("a.?|b.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1, 2]);
|
||||
|
||||
let applier = create_applier_from_pattern("d.?|a.?").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1]);
|
||||
|
||||
let applier = create_applier_from_pattern(".*").unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, vec![1, 2, 3]);
|
||||
for (pattern, expected) in cases {
|
||||
let applier = create_applier_from_pattern(pattern).unwrap();
|
||||
let results = applier.apply(&test_fst);
|
||||
assert_eq!(results, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user