From 0e74042b4e25e891a313bb239a571478fde71a9a Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Thu, 1 Apr 2021 12:29:51 +0200 Subject: [PATCH] Convert `String` `Body` line-endings to CRLF (#588) --- src/message/body.rs | 83 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/src/message/body.rs b/src/message/body.rs index ab5f88f..f99d3e7 100644 --- a/src/message/body.rs +++ b/src/message/body.rs @@ -1,5 +1,6 @@ use std::{ io::{self, Write}, + mem, ops::Deref, }; @@ -30,13 +31,16 @@ impl Body { /// Automatically chooses the most efficient encoding between /// `7bit`, `quoted-printable` and `base64`. /// + /// If `String` is passed, line endings are converted to `CRLF`. + /// /// If `buf` is valid utf-8 a `String` should be supplied, as `String`s /// can be encoded as `7bit` or `quoted-printable`, while `Vec` always /// get encoded as `base64`. pub fn new>(buf: B) -> Self { - let buf: MaybeString = buf.into(); + let mut buf: MaybeString = buf.into(); let encoding = buf.encoding(); + buf.encode_crlf(); Self::new_impl(buf.into(), encoding) } @@ -44,6 +48,8 @@ impl Body { /// /// [`Body::new`] is generally the better option. /// + /// If `String` is passed, line endings are converted to `CRLF`. + /// /// Returns an [`Err`] giving back the supplied `buf`, in case the chosen /// encoding would have resulted into `buf` being encoded /// into an invalid body. @@ -51,12 +57,13 @@ impl Body { buf: B, encoding: ContentTransferEncoding, ) -> Result> { - let buf: MaybeString = buf.into(); + let mut buf: MaybeString = buf.into(); if !buf.is_encoding_ok(encoding) { return Err(buf.into()); } + buf.encode_crlf(); Ok(Self::new_impl(buf.into(), encoding)) } @@ -162,6 +169,14 @@ impl MaybeString { } } + /// Encode line endings to CRLF if the variant is `String` + fn encode_crlf(&mut self) { + match self { + Self::String(string) => in_place_crlf_line_endings(string), + Self::Binary(_) => {} + } + } + /// Returns `true` if using `encoding` to encode this `MaybeString` /// would result into an invalid encoded body. fn is_encoding_ok(&self, encoding: ContentTransferEncoding) -> bool { @@ -322,9 +337,44 @@ where } } +/// In place conversion to CRLF line endings +fn in_place_crlf_line_endings(string: &mut String) { + let indices = find_all_lf_char_indices(&string); + + for i in indices { + // this relies on `indices` being in reverse order + string.insert(i, '\r'); + } +} + +/// Find indices to all places where `\r` should be inserted +/// in order to make `s` have CRLF line endings +/// +/// The list is reversed, which is more efficient. +fn find_all_lf_char_indices(s: &str) -> Vec { + let mut indices = Vec::new(); + + let mut found_lf = false; + for (i, c) in s.char_indices().rev() { + if mem::take(&mut found_lf) && c != '\r' { + // the previous character was `\n`, but this isn't a `\r` + indices.push(i + c.len_utf8()); + } + + found_lf = c == '\n'; + } + + if found_lf { + // the first character is `\n` + indices.push(0); + } + + indices +} + #[cfg(test)] mod test { - use super::{Body, ContentTransferEncoding}; + use super::{in_place_crlf_line_endings, Body, ContentTransferEncoding}; #[test] fn seven_bit_detect() { @@ -578,4 +628,31 @@ mod test { .as_bytes() ); } + + #[test] + fn crlf() { + let mut string = String::from("Send me a āœ‰ļø\nwith\nlettre!\nšŸ˜€"); + + in_place_crlf_line_endings(&mut string); + assert_eq!(string, "Send me a āœ‰ļø\r\nwith\r\nlettre!\r\nšŸ˜€"); + } + + #[test] + fn harsh_crlf() { + let mut string = String::from("\n\nSend me a āœ‰ļø\r\n\nwith\n\nlettre!\n\r\nšŸ˜€"); + + in_place_crlf_line_endings(&mut string); + assert_eq!( + string, + "\r\n\r\nSend me a āœ‰ļø\r\n\r\nwith\r\n\r\nlettre!\r\n\r\nšŸ˜€" + ); + } + + #[test] + fn crlf_noop() { + let mut string = String::from("\r\nSend me a āœ‰ļø\r\nwith\r\nlettre!\r\nšŸ˜€"); + + in_place_crlf_line_endings(&mut string); + assert_eq!(string, "\r\nSend me a āœ‰ļø\r\nwith\r\nlettre!\r\nšŸ˜€"); + } }