refactor: replace chumsky with nom

This commit is contained in:
Paolo Barbolini
2025-07-05 17:24:42 +02:00
parent b073df7666
commit b752170bd8
6 changed files with 192 additions and 183 deletions

85
Cargo.lock generated
View File

@@ -17,18 +17,6 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy 0.7.35",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
@@ -38,12 +26,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anes"
version = "0.1.6"
@@ -458,16 +440,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
dependencies = [
"hashbrown",
"stacker",
]
[[package]]
name = "ciborium"
version = "0.2.2"
@@ -1044,16 +1016,6 @@ dependencies = [
"crunchy",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "hermit-abi"
version = "0.4.0"
@@ -1331,7 +1293,6 @@ dependencies = [
"async-trait",
"base64",
"boring",
"chumsky",
"criterion",
"ed25519-dalek",
"email-encoding",
@@ -1765,7 +1726,7 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy 0.8.24",
"zerocopy",
]
[[package]]
@@ -1820,15 +1781,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.40"
@@ -2288,19 +2240,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "stacker"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "601f9201feb9b09c00266478bf459952b9ef9a6b94edb2f21eba14ab681a60a9"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"windows-sys 0.59.0",
]
[[package]]
name = "subtle"
version = "2.6.1"
@@ -2977,33 +2916,13 @@ dependencies = [
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive 0.7.35",
]
[[package]]
name = "zerocopy"
version = "0.8.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
dependencies = [
"zerocopy-derive 0.8.24",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
"zerocopy-derive",
]
[[package]]

View File

@@ -20,7 +20,7 @@ maintenance = { status = "actively-developed" }
[dependencies]
email_address = { version = "0.2.1", default-features = false }
chumsky = "0.9"
nom = "8"
idna = "1"
## tracing support
@@ -40,7 +40,6 @@ serde = { version = "1.0.110", features = ["derive"], optional = true }
serde_json = { version = "1", optional = true }
# smtp-transport
nom = { version = "8", optional = true }
hostname = { version = "0.4", optional = true } # feature
socket2 = { version = "0.6", optional = true }
url = { version = "2.4", optional = true }
@@ -107,7 +106,7 @@ mime03 = ["dep:mime"]
file-transport = ["dep:uuid", "tokio1_crate?/fs", "tokio1_crate?/io-util"]
file-transport-envelope = ["serde", "dep:serde_json", "file-transport"]
sendmail-transport = ["tokio1_crate?/process", "tokio1_crate?/io-util", "async-std?/unstable"]
smtp-transport = ["dep:base64", "dep:nom", "dep:socket2", "dep:url", "dep:percent-encoding", "tokio1_crate?/rt", "tokio1_crate?/time", "tokio1_crate?/net"]
smtp-transport = ["dep:base64", "dep:socket2", "dep:url", "dep:percent-encoding", "tokio1_crate?/rt", "tokio1_crate?/time", "tokio1_crate?/net"]
pool = ["dep:futures-util"]

View File

@@ -3,30 +3,34 @@
//!
//! [RFC2234]: https://datatracker.ietf.org/doc/html/rfc2234
use chumsky::{error::Cheap, prelude::*};
use nom::{
branch::alt,
character::complete::{char, satisfy},
IResult, Parser,
};
// 6.1 Core Rules
// https://datatracker.ietf.org/doc/html/rfc2234#section-6.1
// ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
pub(super) fn alpha() -> impl Parser<char, char, Error = Cheap<char>> {
filter(|c: &char| c.is_ascii_alphabetic())
pub(super) fn alpha(input: &str) -> IResult<&str, char> {
satisfy(|c| c.is_ascii_alphabetic()).parse(input)
}
// DIGIT = %x30-39
// ; 0-9
pub(super) fn digit() -> impl Parser<char, char, Error = Cheap<char>> {
filter(|c: &char| c.is_ascii_digit())
pub(super) fn digit(input: &str) -> IResult<&str, char> {
satisfy(|c| c.is_ascii_digit()).parse(input)
}
// DQUOTE = %x22
// ; " (Double Quote)
pub(super) fn dquote() -> impl Parser<char, char, Error = Cheap<char>> {
just('"')
pub(super) fn dquote(input: &str) -> IResult<&str, char> {
char('"').parse(input)
}
// WSP = SP / HTAB
// ; white space
pub(super) fn wsp() -> impl Parser<char, char, Error = Cheap<char>> {
choice((just(' '), just('\t')))
pub(super) fn wsp(input: &str) -> IResult<&str, char> {
alt((char(' '), char('\t'))).parse(input)
}

View File

@@ -3,7 +3,14 @@
//!
//! [RFC2822]: https://datatracker.ietf.org/doc/html/rfc2822
use chumsky::{error::Cheap, prelude::*};
use nom::{
branch::alt,
character::complete::{char, satisfy},
combinator::{eof, map, opt},
multi::{fold_many0, fold_many1, many0, many1, separated_list0},
sequence::{delimited, pair, preceded, terminated},
IResult, Parser,
};
use super::{rfc2234, rfc5336};
@@ -15,8 +22,8 @@ use super::{rfc2234, rfc5336};
// %d12 / ; carriage return, line feed,
// %d14-31 / ; and white space characters
// %d127
fn no_ws_ctl() -> impl Parser<char, char, Error = Cheap<char>> {
filter(|c| matches!(u32::from(*c), 1..=8 | 11 | 12 | 14..=31 | 127))
fn no_ws_ctl(input: &str) -> IResult<&str, char> {
satisfy(|c| matches!(u32::from(c), 1..=8 | 11 | 12 | 14..=31 | 127)).parse(input)
}
// text = %d1-9 / ; Characters excluding CR and LF
@@ -24,16 +31,16 @@ fn no_ws_ctl() -> impl Parser<char, char, Error = Cheap<char>> {
// %d12 /
// %d14-127 /
// obs-text
fn text() -> impl Parser<char, char, Error = Cheap<char>> {
filter(|c| matches!(u32::from(*c), 1..=9 | 11 | 12 | 14..=127))
fn text(input: &str) -> IResult<&str, char> {
satisfy(|c| matches!(u32::from(c), 1..=9 | 11 | 12 | 14..=127)).parse(input)
}
// 3.2.2. Quoted characters
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.2.2
// quoted-pair = ("\" text) / obs-qp
fn quoted_pair() -> impl Parser<char, char, Error = Cheap<char>> {
just('\\').ignore_then(text())
fn quoted_pair(input: &str) -> IResult<&str, char> {
preceded(char('\\'), text).parse(input)
}
// 3.2.3. Folding white space and comments
@@ -41,17 +48,19 @@ fn quoted_pair() -> impl Parser<char, char, Error = Cheap<char>> {
// FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space
// obs-FWS
pub(super) fn fws() -> impl Parser<char, Option<char>, Error = Cheap<char>> {
rfc2234::wsp()
.or_not()
.then_ignore(rfc2234::wsp().ignored().repeated())
pub(super) fn fws(input: &str) -> IResult<&str, Option<char>> {
map(
pair(opt(rfc2234::wsp), many0(rfc2234::wsp)),
|(first, _rest)| first,
)
.parse(input)
}
// CFWS = *([FWS] comment) (([FWS] comment) / FWS)
pub(super) fn cfws() -> impl Parser<char, Option<char>, Error = Cheap<char>> {
pub(super) fn cfws(input: &str) -> IResult<&str, Option<char>> {
// TODO: comment are not currently supported, so for now a cfws is
// the same as a fws.
fws()
fws(input)
}
// 3.2.4. Atom
@@ -68,13 +77,13 @@ pub(super) fn cfws() -> impl Parser<char, Option<char>, Error = Cheap<char>> {
// "`" / "{" /
// "|" / "}" /
// "~"
pub(super) fn atext() -> impl Parser<char, char, Error = Cheap<char>> {
choice((
rfc2234::alpha(),
rfc2234::digit(),
filter(|c| {
pub(super) fn atext(input: &str) -> IResult<&str, char> {
alt((
rfc2234::alpha,
rfc2234::digit,
satisfy(|c| {
matches!(
*c,
c,
'!' | '#'
| '$'
| '%'
@@ -96,29 +105,59 @@ pub(super) fn atext() -> impl Parser<char, char, Error = Cheap<char>> {
)
}),
// also allow non ASCII UTF8 chars
rfc5336::utf8_non_ascii(),
rfc5336::utf8_non_ascii,
))
.parse(input)
}
// atom = [CFWS] 1*atext [CFWS]
pub(super) fn atom() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
cfws().chain(atext().repeated().at_least(1))
pub(super) fn atom(input: &str) -> IResult<&str, String> {
map(
pair(
cfws,
fold_many1(atext, String::new, |mut acc, c| {
acc.push(c);
acc
}),
),
|(_cfws, chars)| chars,
)
.parse(input)
}
// dot-atom = [CFWS] dot-atom-text [CFWS]
pub(super) fn dot_atom() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
cfws().chain(dot_atom_text())
pub(super) fn dot_atom(input: &str) -> IResult<&str, String> {
map(pair(cfws, dot_atom_text), |(_cfws, text)| text).parse(input)
}
// dot-atom-text = 1*atext *("." 1*atext)
pub(super) fn dot_atom_text() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
atext().repeated().at_least(1).chain(
just('.')
.chain(atext().repeated().at_least(1))
.repeated()
.at_least(1)
.flatten(),
pub(super) fn dot_atom_text(input: &str) -> IResult<&str, String> {
map(
pair(
fold_many1(atext, String::new, |mut acc, c| {
acc.push(c);
acc
}),
many0(map(
pair(
char('.'),
fold_many1(atext, String::new, |mut acc, c| {
acc.push(c);
acc
}),
),
|(dot, chars)| format!("{}{}", dot, chars),
)),
),
|(first, rest)| {
let mut result = first;
for part in rest {
result.push_str(&part);
}
result
},
)
.parse(input)
}
// 3.2.5. Quoted strings
@@ -129,122 +168,171 @@ pub(super) fn dot_atom_text() -> impl Parser<char, Vec<char>, Error = Cheap<char
// %d33 / ; The rest of the US-ASCII
// %d35-91 / ; characters not including "\"
// %d93-126 ; or the quote character
fn qtext() -> impl Parser<char, char, Error = Cheap<char>> {
choice((
filter(|c| matches!(u32::from(*c), 33 | 35..=91 | 93..=126)),
no_ws_ctl(),
fn qtext(input: &str) -> IResult<&str, char> {
alt((
satisfy(|c| matches!(u32::from(c), 33 | 35..=91 | 93..=126)),
no_ws_ctl,
))
.parse(input)
}
// qcontent = qtext / quoted-pair
pub(super) fn qcontent() -> impl Parser<char, char, Error = Cheap<char>> {
choice((qtext(), quoted_pair(), rfc5336::utf8_non_ascii()))
pub(super) fn qcontent(input: &str) -> IResult<&str, char> {
alt((qtext, quoted_pair, rfc5336::utf8_non_ascii)).parse(input)
}
// quoted-string = [CFWS]
// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
// [CFWS]
fn quoted_string() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
rfc2234::dquote()
.ignore_then(fws().chain(qcontent()).repeated().flatten())
.then_ignore(text::whitespace())
.then_ignore(rfc2234::dquote())
fn quoted_string(input: &str) -> IResult<&str, String> {
map(
delimited(
rfc2234::dquote,
fold_many0(
map(pair(fws, qcontent), |(_fws, c)| c),
String::new,
|mut acc, c| {
acc.push(c);
acc
},
),
preceded(many0(satisfy(|c| c.is_whitespace())), rfc2234::dquote),
),
|s| s,
)
.parse(input)
}
// 3.2.6. Miscellaneous tokens
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.2.6
// word = atom / quoted-string
fn word() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
choice((quoted_string(), atom()))
fn word(input: &str) -> IResult<&str, String> {
alt((quoted_string, atom)).parse(input)
}
// phrase = 1*word / obs-phrase
fn phrase() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
choice((obs_phrase(), word().repeated().at_least(1).flatten()))
fn phrase(input: &str) -> IResult<&str, String> {
alt((obs_phrase, map(many1(word), |words| words.join(" ")))).parse(input)
}
// 3.4. Address Specification
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.4
// mailbox = name-addr / addr-spec
pub(crate) fn mailbox() -> impl Parser<char, (Option<String>, (String, String)), Error = Cheap<char>>
{
choice((name_addr(), addr_spec().map(|addr| (None, addr))))
.padded()
.then_ignore(end())
pub(crate) fn mailbox(input: &str) -> IResult<&str, (Option<String>, (String, String))> {
terminated(alt((name_addr, map(addr_spec, |addr| (None, addr)))), eof).parse(input)
}
// name-addr = [display-name] angle-addr
fn name_addr() -> impl Parser<char, (Option<String>, (String, String)), Error = Cheap<char>> {
display_name().collect().or_not().then(angle_addr())
fn name_addr(input: &str) -> IResult<&str, (Option<String>, (String, String))> {
pair(opt(display_name), angle_addr).parse(input)
}
// angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
fn angle_addr() -> impl Parser<char, (String, String), Error = Cheap<char>> {
addr_spec()
.delimited_by(just('<').ignored(), just('>').ignored())
.padded()
fn angle_addr(input: &str) -> IResult<&str, (String, String)> {
delimited((cfws, char('<')), addr_spec, (char('>'), cfws)).parse(input)
}
// display-name = phrase
fn display_name() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
phrase()
fn display_name(input: &str) -> IResult<&str, String> {
phrase(input)
}
// mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
pub(crate) fn mailbox_list(
) -> impl Parser<char, Vec<(Option<String>, (String, String))>, Error = Cheap<char>> {
choice((name_addr(), addr_spec().map(|addr| (None, addr))))
.separated_by(just(',').padded())
.then_ignore(end())
pub(crate) fn mailbox_list(input: &str) -> IResult<&str, Vec<(Option<String>, (String, String))>> {
terminated(
separated_list0(
delimited(
many0(satisfy(|c| c.is_whitespace())),
char(','),
many0(satisfy(|c| c.is_whitespace())),
),
alt((name_addr, map(addr_spec, |addr| (None, addr)))),
),
eof,
)
.parse(input)
}
// 3.4.1. Addr-spec specification
// https://datatracker.ietf.org/doc/html/rfc2822#section-3.4.1
// addr-spec = local-part "@" domain
pub(super) fn addr_spec() -> impl Parser<char, (String, String), Error = Cheap<char>> {
local_part()
.collect()
.then_ignore(just('@'))
.then(domain().collect())
pub(super) fn addr_spec(input: &str) -> IResult<&str, (String, String)> {
pair(terminated(local_part, char('@')), domain).parse(input)
}
// local-part = dot-atom / quoted-string / obs-local-part
pub(super) fn local_part() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
choice((dot_atom(), quoted_string(), obs_local_part()))
pub(super) fn local_part(input: &str) -> IResult<&str, String> {
alt((dot_atom, quoted_string, obs_local_part)).parse(input)
}
// domain = dot-atom / domain-literal / obs-domain
pub(super) fn domain() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
pub(super) fn domain(input: &str) -> IResult<&str, String> {
// NOTE: omitting domain-literal since it may never be used
choice((dot_atom(), obs_domain()))
alt((dot_atom, obs_domain)).parse(input)
}
// 4.1. Miscellaneous obsolete tokens
// https://datatracker.ietf.org/doc/html/rfc2822#section-4.1
// obs-phrase = word *(word / "." / CFWS)
fn obs_phrase() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
fn obs_phrase(input: &str) -> IResult<&str, String> {
// NOTE: the CFWS is already captured by the word, no need to add
// it there.
word().chain(
choice((word(), just('.').repeated().exactly(1)))
.repeated()
.flatten(),
map(
pair(word, many0(alt((word, map(char('.'), |c| c.to_string()))))),
|(first, rest)| {
let mut result = first;
for part in rest {
result.push_str(&part);
}
result
},
)
.parse(input)
}
// 4.4. Obsolete Addressing
// https://datatracker.ietf.org/doc/html/rfc2822#section-4.4
// obs-local-part = word *("." word)
pub(super) fn obs_local_part() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
word().chain(just('.').chain(word()).repeated().flatten())
pub(super) fn obs_local_part(input: &str) -> IResult<&str, String> {
map(
pair(
word,
many0(map(pair(char('.'), word), |(dot, w)| {
format!("{}{}", dot, w)
})),
),
|(first, rest)| {
let mut result = first;
for part in rest {
result.push_str(&part);
}
result
},
)
.parse(input)
}
// obs-domain = atom *("." atom)
pub(super) fn obs_domain() -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
atom().chain(just('.').chain(atom()).repeated().flatten())
pub(super) fn obs_domain(input: &str) -> IResult<&str, String> {
map(
pair(
atom,
many0(map(pair(char('.'), atom), |(dot, a)| {
format!("{}{}", dot, a)
})),
),
|(first, rest)| {
let mut result = first;
for part in rest {
result.push_str(&part);
}
result
},
)
.parse(input)
}

View File

@@ -3,7 +3,7 @@
//!
//! [RFC5336]: https://datatracker.ietf.org/doc/html/rfc5336
use chumsky::{error::Cheap, prelude::*};
use nom::{character::complete::satisfy, IResult, Parser};
// 3.3. Extended Mailbox Address Syntax
// https://datatracker.ietf.org/doc/html/rfc5336#section-3.3
@@ -12,6 +12,6 @@ use chumsky::{error::Cheap, prelude::*};
// UTF8-2 = <See Section 4 of RFC 3629>
// UTF8-3 = <See Section 4 of RFC 3629>
// UTF8-4 = <See Section 4 of RFC 3629>
pub(super) fn utf8_non_ascii() -> impl Parser<char, char, Error = Cheap<char>> {
filter(|c: &char| c.len_utf8() > 1)
pub(super) fn utf8_non_ascii(input: &str) -> IResult<&str, char> {
satisfy(|c| c.len_utf8() > 1).parse(input)
}

View File

@@ -5,7 +5,6 @@ use std::{
str::FromStr,
};
use chumsky::prelude::*;
use email_encoding::headers::writer::EmailWriter;
use super::parsers;
@@ -114,7 +113,7 @@ impl FromStr for Mailbox {
type Err = AddressError;
fn from_str(src: &str) -> Result<Mailbox, Self::Err> {
let (name, (user, domain)) = parsers::mailbox().parse(src).map_err(|_errs| {
let (_rest, (name, (user, domain))) = parsers::mailbox(src).map_err(|_errs| {
// TODO: improve error management
AddressError::InvalidInput
})?;
@@ -345,7 +344,7 @@ impl FromStr for Mailboxes {
fn from_str(src: &str) -> Result<Self, Self::Err> {
let mut mailboxes = Vec::new();
let parsed_mailboxes = parsers::mailbox_list().parse(src).map_err(|_errs| {
let (_rest, parsed_mailboxes) = parsers::mailbox_list(src).map_err(|_errs| {
// TODO: improve error management
AddressError::InvalidInput
})?;