From 11b4acf0cdf29a5ca19121e940144b8806e578e3 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 10 Sep 2022 12:40:00 +0200 Subject: [PATCH] Improve header encoding and wrapping (#811) Also cleans up the encoder a lot, removing some complicated logic introduced by the initial round of implementation --- Cargo.toml | 2 +- src/message/header/content_disposition.rs | 14 +- src/message/header/mailbox.rs | 12 +- src/message/header/mod.rs | 157 +++++++++------------- src/message/header/textual.rs | 11 ++ src/message/mailbox/types.rs | 4 +- src/message/mod.rs | 4 +- 7 files changed, 97 insertions(+), 107 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7e3c33e..2316f2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ mime = { version = "0.3.4", optional = true } fastrand = { version = "1.4", optional = true } quoted_printable = { version = "0.4", optional = true } base64 = { version = "0.13", optional = true } -email-encoding = { version = "0.1.1", optional = true } +email-encoding = { git = "https://github.com/lettre/email-encoding.git", rev = "e255be51b1d7f957464cc9f135aef74e3c78bb64", optional = true } # file transport uuid = { version = "1", features = ["v4"], optional = true } diff --git a/src/message/header/content_disposition.rs b/src/message/header/content_disposition.rs index df8e0bd..0eeabad 100644 --- a/src/message/header/content_disposition.rs +++ b/src/message/header/content_disposition.rs @@ -37,13 +37,15 @@ impl ContentDisposition { let mut encoded_value = String::new(); let line_len = "Content-Disposition: ".len(); - let mut w = EmailWriter::new(&mut encoded_value, line_len, false); - w.write_str(kind).expect("writing `kind` returned an error"); - w.write_char(';').expect("writing `;` returned an error"); - w.space(); + { + let mut w = EmailWriter::new(&mut encoded_value, line_len, 0, false, false); + w.write_str(kind).expect("writing `kind` returned an error"); + w.write_char(';').expect("writing `;` returned an error"); + w.optional_breakpoint(); - email_encoding::headers::rfc2231::encode("filename", file_name, &mut w) - .expect("some Write implementation returned an error"); + email_encoding::headers::rfc2231::encode("filename", file_name, &mut w) + .expect("some Write implementation returned an error"); + } Self(HeaderValue::dangerous_new_pre_encoded( Self::name(), diff --git a/src/message/header/mailbox.rs b/src/message/header/mailbox.rs index 25c0235..a40193e 100644 --- a/src/message/header/mailbox.rs +++ b/src/message/header/mailbox.rs @@ -30,8 +30,10 @@ macro_rules! mailbox_header { fn display(&self) -> HeaderValue { let mut encoded_value = String::new(); let line_len = $header_name.len() + ": ".len(); - let mut w = EmailWriter::new(&mut encoded_value, line_len, false); - self.0.encode(&mut w).expect("writing `Mailbox` returned an error"); + { + let mut w = EmailWriter::new(&mut encoded_value, line_len, 0, false, false); + self.0.encode(&mut w).expect("writing `Mailbox` returned an error"); + } HeaderValue::dangerous_new_pre_encoded(Self::name(), self.0.to_string(), encoded_value) } @@ -78,8 +80,10 @@ macro_rules! mailboxes_header { fn display(&self) -> HeaderValue { let mut encoded_value = String::new(); let line_len = $header_name.len() + ": ".len(); - let mut w = EmailWriter::new(&mut encoded_value, line_len, false); - self.0.encode(&mut w).expect("writing `Mailboxes` returned an error"); + { + let mut w = EmailWriter::new(&mut encoded_value, line_len, 0, false, false); + self.0.encode(&mut w).expect("writing `Mailboxes` returned an error"); + } HeaderValue::dangerous_new_pre_encoded(Self::name(), self.0.to_string(), encoded_value) } diff --git a/src/message/header/mod.rs b/src/message/header/mod.rs index df3b409..98d260c 100644 --- a/src/message/header/mod.rs +++ b/src/message/header/mod.rs @@ -342,28 +342,21 @@ struct HeaderValueEncoder<'a> { impl<'a> HeaderValueEncoder<'a> { fn encode(name: &str, value: &'a str, f: &'a mut impl fmt::Write) -> fmt::Result { - let (words_iter, encoder) = Self::new(name, value, f); - encoder.format(words_iter) + let encoder = Self::new(name, f); + encoder.format(value.split_inclusive(' ')) } - fn new( - name: &str, - value: &'a str, - writer: &'a mut dyn Write, - ) -> (WordsPlusFillIterator<'a>, Self) { + fn new(name: &str, writer: &'a mut dyn Write) -> Self { let line_len = name.len() + ": ".len(); - let writer = EmailWriter::new(writer, line_len, false); + let writer = EmailWriter::new(writer, line_len, 0, false, false); - ( - WordsPlusFillIterator { s: value }, - Self { - writer, - encode_buf: String::new(), - }, - ) + Self { + writer, + encode_buf: String::new(), + } } - fn format(mut self, words_iter: WordsPlusFillIterator<'_>) -> fmt::Result { + fn format(mut self, words_iter: impl Iterator) -> fmt::Result { for next_word in words_iter { let allowed = allowed_str(next_word); @@ -391,71 +384,20 @@ impl<'a> HeaderValueEncoder<'a> { return Ok(()); } - // It is important that we don't encode leading whitespace otherwise it breaks wrapping. - let first_not_allowed = self - .encode_buf - .bytes() - .enumerate() - .find(|(_i, c)| !allowed_char(*c)) - .map(|(i, _)| i); - // May as well also write the tail in plain text. - let last_not_allowed = self - .encode_buf - .bytes() - .enumerate() - .rev() - .find(|(_i, c)| !allowed_char(*c)) - .map(|(i, _)| i + 1); + let prefix = self.encode_buf.trim_end_matches(' '); + email_encoding::headers::rfc2047::encode(prefix, &mut self.writer)?; - let (prefix, to_encode, suffix) = match first_not_allowed { - Some(first_not_allowed) => { - let last_not_allowed = last_not_allowed.unwrap(); - - let (remaining, suffix) = self.encode_buf.split_at(last_not_allowed); - let (prefix, to_encode) = remaining.split_at(first_not_allowed); - - (prefix, to_encode, suffix) - } - None => ("", self.encode_buf.as_str(), ""), - }; - - self.writer.folding().write_str(prefix)?; - email_encoding::headers::rfc2047::encode(to_encode, &mut self.writer)?; - self.writer.folding().write_str(suffix)?; + // TODO: add a better API for doing this in email-encoding + let spaces = self.encode_buf.len() - prefix.len(); + for _ in 0..spaces { + self.writer.space(); + } self.encode_buf.clear(); Ok(()) } } -/// Iterator yielding a string split by space, but spaces are included before the next word. -struct WordsPlusFillIterator<'a> { - s: &'a str, -} - -impl<'a> Iterator for WordsPlusFillIterator<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option { - if self.s.is_empty() { - return None; - } - - let next_word = self - .s - .bytes() - .enumerate() - .skip(1) - .find(|&(_i, c)| c == b' ') - .map(|(i, _)| i) - .unwrap_or(self.s.len()); - - let word = &self.s[..next_word]; - self.s = &self.s[word.len()..]; - Some(word) - } -} - fn allowed_str(s: &str) -> bool { s.bytes().all(allowed_char) } @@ -468,7 +410,8 @@ const fn allowed_char(c: u8) -> bool { mod tests { use pretty_assertions::assert_eq; - use super::{HeaderName, HeaderValue, Headers}; + use super::{HeaderName, HeaderValue, Headers, To}; + use crate::message::Mailboxes; #[test] fn valid_headername() { @@ -619,7 +562,7 @@ mod tests { assert_eq!( headers.to_string(), - "To: Se=?utf-8?b?w6E=?=n \r\n" + "To: =?utf-8?b?U2XDoW4=?= \r\n" ); } @@ -640,19 +583,46 @@ mod tests { #[test] fn format_special_with_folding() { let mut headers = Headers::new(); - headers.insert_raw(HeaderValue::new( - HeaderName::new_from_ascii_str("To"), - "🌍 , 🦆 Everywhere , Иванов Иван Иванович , Jānis Bērziņš , Seán Ó Rudaí ".to_string(), - ) ); + let to = To::from(Mailboxes::from_iter([ + "🌍 ".parse().unwrap(), + "🦆 Everywhere ".parse().unwrap(), + "Иванов Иван Иванович ".parse().unwrap(), + "Jānis Bērziņš ".parse().unwrap(), + "Seán Ó Rudaí ".parse().unwrap(), + ])); + headers.set(to); assert_eq!( headers.to_string(), concat!( - "To: =?utf-8?b?8J+MjQ==?= , =?utf-8?b?8J+mhg==?= Everywhere\r\n", - " , =?utf-8?b?0JjQstCw0L3QvtCyINCY0LLQsNC9INCY0LLQsNC9?=\r\n", - " =?utf-8?b?0L7QstC40Yc=?= , J=?utf-8?b?xIFuaXMgQsST?=\r\n", - " =?utf-8?b?cnppxYbFoQ==?= , Se=?utf-8?b?w6FuIMOTIFJ1?=\r\n", - " =?utf-8?b?ZGHDrQ==?= \r\n", + "To: =?utf-8?b?8J+MjQ==?= , =?utf-8?b?8J+mhiBFdmVyeXdo?=\r\n", + " =?utf-8?b?ZXJl?= , =?utf-8?b?0JjQstCw0L3QvtCyINCY0LI=?=\r\n", + " =?utf-8?b?0LDQvSDQmNCy0LDQvdC+0LLQuNGH?= ,\r\n", + " =?utf-8?b?SsSBbmlzIELEk3J6acWGxaE=?= , =?utf-8?b?U2U=?=\r\n", + " =?utf-8?b?w6FuIMOTIFJ1ZGHDrQ==?= \r\n", + ) + ); + } + + #[test] + fn format_special_with_folding_raw() { + let mut headers = Headers::new(); + headers.insert_raw(HeaderValue::new( + HeaderName::new_from_ascii_str("To"), + "🌍 , 🦆 Everywhere , Иванов Иван Иванович , Jānis Bērziņš , Seán Ó Rudaí ".to_string(), + )); + + // TODO: fix the fact that the encoder doesn't know that + // the space between the name and the address should be + // removed when wrapping. + assert_eq!( + headers.to_string(), + concat!( + "To: =?utf-8?b?8J+MjQ==?= , =?utf-8?b?8J+mhg==?=\r\n", + " Everywhere , =?utf-8?b?0JjQstCw0L3QvtCyINCY0LLQsNC9?=\r\n", + " =?utf-8?b?INCY0LLQsNC90L7QstC40Yc=?= ,\r\n", + " =?utf-8?b?SsSBbmlzIELEk3J6acWGxaE=?= ,\r\n", + " =?utf-8?b?U2XDoW4gw5MgUnVkYcOt?= \r\n", ) ); } @@ -717,17 +687,20 @@ mod tests { "quoted-printable".to_string(), )); + // TODO: fix the fact that the encoder doesn't know that + // the space between the name and the address should be + // removed when wrapping. assert_eq!( headers.to_string(), concat!( "Subject: Hello! This is lettre, and this\r\n", " IsAVeryLongLineDoYouKnowWhatsGoingToHappenIGuessWeAreGoingToFindOut. Ok I\r\n", " guess that's it!\r\n", - "To: =?utf-8?b?8J+MjQ==?= , =?utf-8?b?8J+mhg==?= Everywhere\r\n", - " , =?utf-8?b?0JjQstCw0L3QvtCyINCY0LLQsNC9INCY0LLQsNC9?=\r\n", - " =?utf-8?b?0L7QstC40Yc=?= , J=?utf-8?b?xIFuaXMgQsST?=\r\n", - " =?utf-8?b?cnppxYbFoQ==?= , Se=?utf-8?b?w6FuIMOTIFJ1?=\r\n", - " =?utf-8?b?ZGHDrQ==?= \r\n", + "To: =?utf-8?b?8J+MjQ==?= , =?utf-8?b?8J+mhg==?=\r\n", + " Everywhere , =?utf-8?b?0JjQstCw0L3QvtCyINCY0LLQsNC9?=\r\n", + " =?utf-8?b?INCY0LLQsNC90L7QstC40Yc=?= ,\r\n", + " =?utf-8?b?SsSBbmlzIELEk3J6acWGxaE=?= ,\r\n", + " =?utf-8?b?U2XDoW4gw5MgUnVkYcOt?= \r\n", "From: Someone \r\n", "Content-Transfer-Encoding: quoted-printable\r\n", ) @@ -745,8 +718,8 @@ mod tests { assert_eq!( headers.to_string(), concat!( - "Subject: =?utf-8?b?77yL5Luu5ZCN?= :a;go;\r\n", - " ;;;;;s;;;;;;;;;;;;;;;;fffeinmjggggggggg=?utf-8?b?772G44Gj?=\r\n" + "Subject: =?utf-8?b?77yL5Luu5ZCN?= :a;go; =?utf-8?b?Ozs7OztzOzs7Ozs7Ozs7?=\r\n", + " =?utf-8?b?Ozs7Ozs7O2ZmZmVpbm1qZ2dnZ2dnZ2dn772G44Gj?=\r\n", ) ); } diff --git a/src/message/header/textual.rs b/src/message/header/textual.rs index 9006547..0fedf5c 100644 --- a/src/message/header/textual.rs +++ b/src/message/header/textual.rs @@ -109,6 +109,17 @@ mod test { ); } + #[test] + fn format_utf8_word() { + let mut headers = Headers::new(); + headers.set(Subject("Administratör".into())); + + assert_eq!( + headers.to_string(), + "Subject: =?utf-8?b?QWRtaW5pc3RyYXTDtnI=?=\r\n" + ); + } + #[test] fn parse_ascii() { let mut headers = Headers::new(); diff --git a/src/message/mailbox/types.rs b/src/message/mailbox/types.rs index 7f9073f..c6ca090 100644 --- a/src/message/mailbox/types.rs +++ b/src/message/mailbox/types.rs @@ -70,7 +70,7 @@ impl Mailbox { pub(crate) fn encode(&self, w: &mut EmailWriter<'_>) -> FmtResult { if let Some(name) = &self.name { email_encoding::headers::quoted_string::encode(name, w)?; - w.space(); + w.optional_breakpoint(); w.write_char('<')?; } @@ -275,7 +275,7 @@ impl Mailboxes { for mailbox in self.iter() { if !mem::take(&mut first) { w.write_char(',')?; - w.space(); + w.optional_breakpoint(); } mailbox.encode(w)?; diff --git a/src/message/mod.rs b/src/message/mod.rs index 50f8e8c..70cf5a1 100644 --- a/src/message/mod.rs +++ b/src/message/mod.rs @@ -673,7 +673,7 @@ mod test { "Date: Tue, 15 Nov 1994 08:12:31 +0000\r\n", "From: =?utf-8?b?0JrQsNC4?= \r\n", "To: \"Pony O.P.\" \r\n", - "Subject: =?utf-8?b?0Y/So9CwINC10Lsg0LHQtdC705nQvQ==?=!\r\n", + "Subject: =?utf-8?b?0Y/So9CwINC10Lsg0LHQtdC705nQvSE=?=\r\n", "Content-Transfer-Encoding: 7bit\r\n", "\r\n", "Happy new year!" @@ -711,7 +711,7 @@ mod test { "Bcc: hidden@example.com\r\n", "From: =?utf-8?b?0JrQsNC4?= \r\n", "To: \"Pony O.P.\" \r\n", - "Subject: =?utf-8?b?0Y/So9CwINC10Lsg0LHQtdC705nQvQ==?=!\r\n", + "Subject: =?utf-8?b?0Y/So9CwINC10Lsg0LHQtdC705nQvSE=?=\r\n", "Content-Transfer-Encoding: 7bit\r\n", "\r\n", "Happy new year!"