feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 1) (#3034)

* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/sst/index/applier/builder.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: remove unwrap

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: error source

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
Zhenchi
2023-12-30 15:32:32 +08:00
committed by GitHub
parent 1c94d4c506
commit 69a53130c2
14 changed files with 668 additions and 50 deletions

View File

@@ -113,7 +113,7 @@ pub enum Error {
#[snafu(display("Failed to parse regex DFA"))]
ParseDFA {
#[snafu(source)]
error: regex_automata::Error,
error: Box<regex_automata::dfa::Error>,
location: Location,
},

View File

@@ -14,7 +14,7 @@
use fst::map::OpBuilder;
use fst::{IntoStreamer, Streamer};
use regex_automata::DenseDFA;
use regex_automata::dfa::dense::DFA;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{
@@ -24,15 +24,13 @@ use crate::inverted_index::search::fst_apply::FstApplier;
use crate::inverted_index::search::predicate::{Predicate, Range};
use crate::inverted_index::FstMap;
type Dfa = DenseDFA<Vec<usize>, usize>;
/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns.
pub struct IntersectionFstApplier {
/// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap.
ranges: Vec<Range>,
/// A list of `Dfa` compiled from regular expression patterns.
dfas: Vec<Dfa>,
dfas: Vec<DFA<Vec<u32>>>,
}
impl FstApplier for IntersectionFstApplier {
@@ -88,8 +86,8 @@ impl IntersectionFstApplier {
match predicate {
Predicate::Range(range) => ranges.push(range.range),
Predicate::RegexMatch(regex) => {
let dfa = DenseDFA::new(&regex.pattern);
let dfa = dfa.context(ParseDFASnafu)?;
let dfa = DFA::new(&regex.pattern);
let dfa = dfa.map_err(Box::new).context(ParseDFASnafu)?;
dfas.push(dfa);
}
// Rejection of `InList` predicates is enforced here.
@@ -210,47 +208,67 @@ mod tests {
#[test]
fn test_intersection_fst_applier_with_valid_pattern() {
let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap();
let test_fst = FstMap::from_iter([("123", 1), ("abc", 2)]).unwrap();
let applier = create_applier_from_pattern("a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let cases = vec![
("1", vec![1]),
("2", vec![1]),
("3", vec![1]),
("^1", vec![1]),
("^2", vec![]),
("^3", vec![]),
("^1.*", vec![1]),
("^.*2", vec![1]),
("^.*3", vec![1]),
("1$", vec![]),
("2$", vec![]),
("3$", vec![1]),
("1.*$", vec![1]),
("2.*$", vec![1]),
("3.*$", vec![1]),
("^1..$", vec![1]),
("^.2.$", vec![1]),
("^..3$", vec![1]),
("^[0-9]", vec![1]),
("^[0-9]+$", vec![1]),
("^[0-9][0-9]$", vec![]),
("^[0-9][0-9][0-9]$", vec![1]),
("^123$", vec![1]),
("a", vec![2]),
("b", vec![2]),
("c", vec![2]),
("^a", vec![2]),
("^b", vec![]),
("^c", vec![]),
("^a.*", vec![2]),
("^.*b", vec![2]),
("^.*c", vec![2]),
("a$", vec![]),
("b$", vec![]),
("c$", vec![2]),
("a.*$", vec![2]),
("b.*$", vec![2]),
("c.*$", vec![2]),
("^.[a-z]", vec![2]),
("^abc$", vec![2]),
("^ab$", vec![]),
("abc$", vec![2]),
("^a.c$", vec![2]),
("^..c$", vec![2]),
("ab", vec![2]),
(".*", vec![1, 2]),
("", vec![1, 2]),
("^$", vec![]),
("1|a", vec![1, 2]),
("^123$|^abc$", vec![1, 2]),
("^123$|d", vec![1]),
];
let applier = create_applier_from_pattern("b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("a.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern("b.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("d.?").unwrap();
let results = applier.apply(&test_fst);
assert!(results.is_empty());
let applier = create_applier_from_pattern("a.?|b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2]);
let applier = create_applier_from_pattern("d.?|a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern(".*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2, 3]);
for (pattern, expected) in cases {
let applier = create_applier_from_pattern(pattern).unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, expected);
}
}
#[test]