From b752170bd87d0d33fa6b002d0ea1794a09147746 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 5 Jul 2025 17:24:42 +0200 Subject: [PATCH] refactor: replace `chumsky` with `nom` --- Cargo.lock | 85 +-------- Cargo.toml | 5 +- src/message/mailbox/parsers/rfc2234.rs | 22 ++- src/message/mailbox/parsers/rfc2822.rs | 252 +++++++++++++++++-------- src/message/mailbox/parsers/rfc5336.rs | 6 +- src/message/mailbox/types.rs | 5 +- 6 files changed, 192 insertions(+), 183 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fadb2b0..5d529f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,18 +17,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy 0.7.35", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -38,12 +26,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "anes" version = "0.1.6" @@ -458,16 +440,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown", - "stacker", -] - [[package]] name = "ciborium" version = "0.2.2" @@ -1044,16 +1016,6 @@ dependencies = [ "crunchy", ] -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - [[package]] name = "hermit-abi" version = "0.4.0" @@ -1331,7 +1293,6 @@ dependencies = [ "async-trait", "base64", "boring", - "chumsky", "criterion", "ed25519-dalek", "email-encoding", @@ -1765,7 +1726,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.8.24", + "zerocopy", ] [[package]] @@ -1820,15 +1781,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "psm" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88" -dependencies = [ - "cc", -] - [[package]] name = "quote" version = "1.0.40" @@ -2288,19 +2240,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "stacker" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601f9201feb9b09c00266478bf459952b9ef9a6b94edb2f21eba14ab681a60a9" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "subtle" version = "2.6.1" @@ -2977,33 +2916,13 @@ dependencies = [ "synstructure", ] -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive 0.7.35", -] - [[package]] name = "zerocopy" version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" dependencies = [ - "zerocopy-derive 0.8.24", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.100", + "zerocopy-derive", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 313134d..07bbbfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ maintenance = { status = "actively-developed" } [dependencies] email_address = { version = "0.2.1", default-features = false } -chumsky = "0.9" +nom = "8" idna = "1" ## tracing support @@ -40,7 +40,6 @@ serde = { version = "1.0.110", features = ["derive"], optional = true } serde_json = { version = "1", optional = true } # smtp-transport -nom = { version = "8", optional = true } hostname = { version = "0.4", optional = true } # feature socket2 = { version = "0.6", optional = true } url = { version = "2.4", optional = true } @@ -107,7 +106,7 @@ mime03 = ["dep:mime"] file-transport = ["dep:uuid", "tokio1_crate?/fs", "tokio1_crate?/io-util"] file-transport-envelope = ["serde", "dep:serde_json", "file-transport"] sendmail-transport = ["tokio1_crate?/process", "tokio1_crate?/io-util", "async-std?/unstable"] -smtp-transport = ["dep:base64", "dep:nom", "dep:socket2", "dep:url", "dep:percent-encoding", "tokio1_crate?/rt", "tokio1_crate?/time", "tokio1_crate?/net"] +smtp-transport = ["dep:base64", "dep:socket2", "dep:url", "dep:percent-encoding", "tokio1_crate?/rt", "tokio1_crate?/time", "tokio1_crate?/net"] pool = ["dep:futures-util"] diff --git a/src/message/mailbox/parsers/rfc2234.rs b/src/message/mailbox/parsers/rfc2234.rs index 0ac88ce..d18ffe4 100644 --- a/src/message/mailbox/parsers/rfc2234.rs +++ b/src/message/mailbox/parsers/rfc2234.rs @@ -3,30 +3,34 @@ //! //! [RFC2234]: https://datatracker.ietf.org/doc/html/rfc2234 -use chumsky::{error::Cheap, prelude::*}; +use nom::{ + branch::alt, + character::complete::{char, satisfy}, + IResult, Parser, +}; // 6.1 Core Rules // https://datatracker.ietf.org/doc/html/rfc2234#section-6.1 // ALPHA = %x41-5A / %x61-7A ; A-Z / a-z -pub(super) fn alpha() -> impl Parser> { - filter(|c: &char| c.is_ascii_alphabetic()) +pub(super) fn alpha(input: &str) -> IResult<&str, char> { + satisfy(|c| c.is_ascii_alphabetic()).parse(input) } // DIGIT = %x30-39 // ; 0-9 -pub(super) fn digit() -> impl Parser> { - filter(|c: &char| c.is_ascii_digit()) +pub(super) fn digit(input: &str) -> IResult<&str, char> { + satisfy(|c| c.is_ascii_digit()).parse(input) } // DQUOTE = %x22 // ; " (Double Quote) -pub(super) fn dquote() -> impl Parser> { - just('"') +pub(super) fn dquote(input: &str) -> IResult<&str, char> { + char('"').parse(input) } // WSP = SP / HTAB // ; white space -pub(super) fn wsp() -> impl Parser> { - choice((just(' '), just('\t'))) +pub(super) fn wsp(input: &str) -> IResult<&str, char> { + alt((char(' '), char('\t'))).parse(input) } diff --git a/src/message/mailbox/parsers/rfc2822.rs b/src/message/mailbox/parsers/rfc2822.rs index c826b06..ce14e8e 100644 --- a/src/message/mailbox/parsers/rfc2822.rs +++ b/src/message/mailbox/parsers/rfc2822.rs @@ -3,7 +3,14 @@ //! //! [RFC2822]: https://datatracker.ietf.org/doc/html/rfc2822 -use chumsky::{error::Cheap, prelude::*}; +use nom::{ + branch::alt, + character::complete::{char, satisfy}, + combinator::{eof, map, opt}, + multi::{fold_many0, fold_many1, many0, many1, separated_list0}, + sequence::{delimited, pair, preceded, terminated}, + IResult, Parser, +}; use super::{rfc2234, rfc5336}; @@ -15,8 +22,8 @@ use super::{rfc2234, rfc5336}; // %d12 / ; carriage return, line feed, // %d14-31 / ; and white space characters // %d127 -fn no_ws_ctl() -> impl Parser> { - filter(|c| matches!(u32::from(*c), 1..=8 | 11 | 12 | 14..=31 | 127)) +fn no_ws_ctl(input: &str) -> IResult<&str, char> { + satisfy(|c| matches!(u32::from(c), 1..=8 | 11 | 12 | 14..=31 | 127)).parse(input) } // text = %d1-9 / ; Characters excluding CR and LF @@ -24,16 +31,16 @@ fn no_ws_ctl() -> impl Parser> { // %d12 / // %d14-127 / // obs-text -fn text() -> impl Parser> { - filter(|c| matches!(u32::from(*c), 1..=9 | 11 | 12 | 14..=127)) +fn text(input: &str) -> IResult<&str, char> { + satisfy(|c| matches!(u32::from(c), 1..=9 | 11 | 12 | 14..=127)).parse(input) } // 3.2.2. Quoted characters // https://datatracker.ietf.org/doc/html/rfc2822#section-3.2.2 // quoted-pair = ("\" text) / obs-qp -fn quoted_pair() -> impl Parser> { - just('\\').ignore_then(text()) +fn quoted_pair(input: &str) -> IResult<&str, char> { + preceded(char('\\'), text).parse(input) } // 3.2.3. Folding white space and comments @@ -41,17 +48,19 @@ fn quoted_pair() -> impl Parser> { // FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space // obs-FWS -pub(super) fn fws() -> impl Parser, Error = Cheap> { - rfc2234::wsp() - .or_not() - .then_ignore(rfc2234::wsp().ignored().repeated()) +pub(super) fn fws(input: &str) -> IResult<&str, Option> { + map( + pair(opt(rfc2234::wsp), many0(rfc2234::wsp)), + |(first, _rest)| first, + ) + .parse(input) } // CFWS = *([FWS] comment) (([FWS] comment) / FWS) -pub(super) fn cfws() -> impl Parser, Error = Cheap> { +pub(super) fn cfws(input: &str) -> IResult<&str, Option> { // TODO: comment are not currently supported, so for now a cfws is // the same as a fws. - fws() + fws(input) } // 3.2.4. Atom @@ -68,13 +77,13 @@ pub(super) fn cfws() -> impl Parser, Error = Cheap> { // "`" / "{" / // "|" / "}" / // "~" -pub(super) fn atext() -> impl Parser> { - choice(( - rfc2234::alpha(), - rfc2234::digit(), - filter(|c| { +pub(super) fn atext(input: &str) -> IResult<&str, char> { + alt(( + rfc2234::alpha, + rfc2234::digit, + satisfy(|c| { matches!( - *c, + c, '!' | '#' | '$' | '%' @@ -96,29 +105,59 @@ pub(super) fn atext() -> impl Parser> { ) }), // also allow non ASCII UTF8 chars - rfc5336::utf8_non_ascii(), + rfc5336::utf8_non_ascii, )) + .parse(input) } // atom = [CFWS] 1*atext [CFWS] -pub(super) fn atom() -> impl Parser, Error = Cheap> { - cfws().chain(atext().repeated().at_least(1)) +pub(super) fn atom(input: &str) -> IResult<&str, String> { + map( + pair( + cfws, + fold_many1(atext, String::new, |mut acc, c| { + acc.push(c); + acc + }), + ), + |(_cfws, chars)| chars, + ) + .parse(input) } // dot-atom = [CFWS] dot-atom-text [CFWS] -pub(super) fn dot_atom() -> impl Parser, Error = Cheap> { - cfws().chain(dot_atom_text()) +pub(super) fn dot_atom(input: &str) -> IResult<&str, String> { + map(pair(cfws, dot_atom_text), |(_cfws, text)| text).parse(input) } // dot-atom-text = 1*atext *("." 1*atext) -pub(super) fn dot_atom_text() -> impl Parser, Error = Cheap> { - atext().repeated().at_least(1).chain( - just('.') - .chain(atext().repeated().at_least(1)) - .repeated() - .at_least(1) - .flatten(), +pub(super) fn dot_atom_text(input: &str) -> IResult<&str, String> { + map( + pair( + fold_many1(atext, String::new, |mut acc, c| { + acc.push(c); + acc + }), + many0(map( + pair( + char('.'), + fold_many1(atext, String::new, |mut acc, c| { + acc.push(c); + acc + }), + ), + |(dot, chars)| format!("{}{}", dot, chars), + )), + ), + |(first, rest)| { + let mut result = first; + for part in rest { + result.push_str(&part); + } + result + }, ) + .parse(input) } // 3.2.5. Quoted strings @@ -129,122 +168,171 @@ pub(super) fn dot_atom_text() -> impl Parser, Error = Cheap impl Parser> { - choice(( - filter(|c| matches!(u32::from(*c), 33 | 35..=91 | 93..=126)), - no_ws_ctl(), +fn qtext(input: &str) -> IResult<&str, char> { + alt(( + satisfy(|c| matches!(u32::from(c), 33 | 35..=91 | 93..=126)), + no_ws_ctl, )) + .parse(input) } // qcontent = qtext / quoted-pair -pub(super) fn qcontent() -> impl Parser> { - choice((qtext(), quoted_pair(), rfc5336::utf8_non_ascii())) +pub(super) fn qcontent(input: &str) -> IResult<&str, char> { + alt((qtext, quoted_pair, rfc5336::utf8_non_ascii)).parse(input) } // quoted-string = [CFWS] // DQUOTE *([FWS] qcontent) [FWS] DQUOTE // [CFWS] -fn quoted_string() -> impl Parser, Error = Cheap> { - rfc2234::dquote() - .ignore_then(fws().chain(qcontent()).repeated().flatten()) - .then_ignore(text::whitespace()) - .then_ignore(rfc2234::dquote()) +fn quoted_string(input: &str) -> IResult<&str, String> { + map( + delimited( + rfc2234::dquote, + fold_many0( + map(pair(fws, qcontent), |(_fws, c)| c), + String::new, + |mut acc, c| { + acc.push(c); + acc + }, + ), + preceded(many0(satisfy(|c| c.is_whitespace())), rfc2234::dquote), + ), + |s| s, + ) + .parse(input) } // 3.2.6. Miscellaneous tokens // https://datatracker.ietf.org/doc/html/rfc2822#section-3.2.6 // word = atom / quoted-string -fn word() -> impl Parser, Error = Cheap> { - choice((quoted_string(), atom())) +fn word(input: &str) -> IResult<&str, String> { + alt((quoted_string, atom)).parse(input) } // phrase = 1*word / obs-phrase -fn phrase() -> impl Parser, Error = Cheap> { - choice((obs_phrase(), word().repeated().at_least(1).flatten())) +fn phrase(input: &str) -> IResult<&str, String> { + alt((obs_phrase, map(many1(word), |words| words.join(" ")))).parse(input) } // 3.4. Address Specification // https://datatracker.ietf.org/doc/html/rfc2822#section-3.4 // mailbox = name-addr / addr-spec -pub(crate) fn mailbox() -> impl Parser, (String, String)), Error = Cheap> -{ - choice((name_addr(), addr_spec().map(|addr| (None, addr)))) - .padded() - .then_ignore(end()) +pub(crate) fn mailbox(input: &str) -> IResult<&str, (Option, (String, String))> { + terminated(alt((name_addr, map(addr_spec, |addr| (None, addr)))), eof).parse(input) } // name-addr = [display-name] angle-addr -fn name_addr() -> impl Parser, (String, String)), Error = Cheap> { - display_name().collect().or_not().then(angle_addr()) +fn name_addr(input: &str) -> IResult<&str, (Option, (String, String))> { + pair(opt(display_name), angle_addr).parse(input) } // angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr -fn angle_addr() -> impl Parser> { - addr_spec() - .delimited_by(just('<').ignored(), just('>').ignored()) - .padded() +fn angle_addr(input: &str) -> IResult<&str, (String, String)> { + delimited((cfws, char('<')), addr_spec, (char('>'), cfws)).parse(input) } // display-name = phrase -fn display_name() -> impl Parser, Error = Cheap> { - phrase() +fn display_name(input: &str) -> IResult<&str, String> { + phrase(input) } // mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list -pub(crate) fn mailbox_list( -) -> impl Parser, (String, String))>, Error = Cheap> { - choice((name_addr(), addr_spec().map(|addr| (None, addr)))) - .separated_by(just(',').padded()) - .then_ignore(end()) +pub(crate) fn mailbox_list(input: &str) -> IResult<&str, Vec<(Option, (String, String))>> { + terminated( + separated_list0( + delimited( + many0(satisfy(|c| c.is_whitespace())), + char(','), + many0(satisfy(|c| c.is_whitespace())), + ), + alt((name_addr, map(addr_spec, |addr| (None, addr)))), + ), + eof, + ) + .parse(input) } // 3.4.1. Addr-spec specification // https://datatracker.ietf.org/doc/html/rfc2822#section-3.4.1 // addr-spec = local-part "@" domain -pub(super) fn addr_spec() -> impl Parser> { - local_part() - .collect() - .then_ignore(just('@')) - .then(domain().collect()) +pub(super) fn addr_spec(input: &str) -> IResult<&str, (String, String)> { + pair(terminated(local_part, char('@')), domain).parse(input) } // local-part = dot-atom / quoted-string / obs-local-part -pub(super) fn local_part() -> impl Parser, Error = Cheap> { - choice((dot_atom(), quoted_string(), obs_local_part())) +pub(super) fn local_part(input: &str) -> IResult<&str, String> { + alt((dot_atom, quoted_string, obs_local_part)).parse(input) } // domain = dot-atom / domain-literal / obs-domain -pub(super) fn domain() -> impl Parser, Error = Cheap> { +pub(super) fn domain(input: &str) -> IResult<&str, String> { // NOTE: omitting domain-literal since it may never be used - choice((dot_atom(), obs_domain())) + alt((dot_atom, obs_domain)).parse(input) } // 4.1. Miscellaneous obsolete tokens // https://datatracker.ietf.org/doc/html/rfc2822#section-4.1 // obs-phrase = word *(word / "." / CFWS) -fn obs_phrase() -> impl Parser, Error = Cheap> { +fn obs_phrase(input: &str) -> IResult<&str, String> { // NOTE: the CFWS is already captured by the word, no need to add // it there. - word().chain( - choice((word(), just('.').repeated().exactly(1))) - .repeated() - .flatten(), + map( + pair(word, many0(alt((word, map(char('.'), |c| c.to_string()))))), + |(first, rest)| { + let mut result = first; + for part in rest { + result.push_str(&part); + } + result + }, ) + .parse(input) } // 4.4. Obsolete Addressing // https://datatracker.ietf.org/doc/html/rfc2822#section-4.4 // obs-local-part = word *("." word) -pub(super) fn obs_local_part() -> impl Parser, Error = Cheap> { - word().chain(just('.').chain(word()).repeated().flatten()) +pub(super) fn obs_local_part(input: &str) -> IResult<&str, String> { + map( + pair( + word, + many0(map(pair(char('.'), word), |(dot, w)| { + format!("{}{}", dot, w) + })), + ), + |(first, rest)| { + let mut result = first; + for part in rest { + result.push_str(&part); + } + result + }, + ) + .parse(input) } // obs-domain = atom *("." atom) -pub(super) fn obs_domain() -> impl Parser, Error = Cheap> { - atom().chain(just('.').chain(atom()).repeated().flatten()) +pub(super) fn obs_domain(input: &str) -> IResult<&str, String> { + map( + pair( + atom, + many0(map(pair(char('.'), atom), |(dot, a)| { + format!("{}{}", dot, a) + })), + ), + |(first, rest)| { + let mut result = first; + for part in rest { + result.push_str(&part); + } + result + }, + ) + .parse(input) } diff --git a/src/message/mailbox/parsers/rfc5336.rs b/src/message/mailbox/parsers/rfc5336.rs index 4f1df96..77e10e1 100644 --- a/src/message/mailbox/parsers/rfc5336.rs +++ b/src/message/mailbox/parsers/rfc5336.rs @@ -3,7 +3,7 @@ //! //! [RFC5336]: https://datatracker.ietf.org/doc/html/rfc5336 -use chumsky::{error::Cheap, prelude::*}; +use nom::{character::complete::satisfy, IResult, Parser}; // 3.3. Extended Mailbox Address Syntax // https://datatracker.ietf.org/doc/html/rfc5336#section-3.3 @@ -12,6 +12,6 @@ use chumsky::{error::Cheap, prelude::*}; // UTF8-2 = // UTF8-3 = // UTF8-4 = -pub(super) fn utf8_non_ascii() -> impl Parser> { - filter(|c: &char| c.len_utf8() > 1) +pub(super) fn utf8_non_ascii(input: &str) -> IResult<&str, char> { + satisfy(|c| c.len_utf8() > 1).parse(input) } diff --git a/src/message/mailbox/types.rs b/src/message/mailbox/types.rs index 16ef53b..bb9d6f1 100644 --- a/src/message/mailbox/types.rs +++ b/src/message/mailbox/types.rs @@ -5,7 +5,6 @@ use std::{ str::FromStr, }; -use chumsky::prelude::*; use email_encoding::headers::writer::EmailWriter; use super::parsers; @@ -114,7 +113,7 @@ impl FromStr for Mailbox { type Err = AddressError; fn from_str(src: &str) -> Result { - let (name, (user, domain)) = parsers::mailbox().parse(src).map_err(|_errs| { + let (_rest, (name, (user, domain))) = parsers::mailbox(src).map_err(|_errs| { // TODO: improve error management AddressError::InvalidInput })?; @@ -345,7 +344,7 @@ impl FromStr for Mailboxes { fn from_str(src: &str) -> Result { let mut mailboxes = Vec::new(); - let parsed_mailboxes = parsers::mailbox_list().parse(src).map_err(|_errs| { + let (_rest, parsed_mailboxes) = parsers::mailbox_list(src).map_err(|_errs| { // TODO: improve error management AddressError::InvalidInput })?;