Add MIME support

MIME config option, LineEnding enum, add necessary code to encode and decode also get rid of decode_ws
2025-03-04 11:27:29 +00:00 · 2017-03-26 01:15:03 +00:00 · 2017-03-26 01:15:03 +00:00 · d0bba9775f
commit d0bba9775f
parent 23381550ec
2 changed files with 193 additions and 53 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -15,6 +15,12 @@ pub enum CharacterSet {
    UrlSafe
 }

+#[derive(Clone, Copy, Debug)]
+pub enum LineEnding {
+    LF,
+    CRLF,
+}
+
 /// Contains configuration parameters for base64 encoding
 #[derive(Clone, Copy, Debug)]
 pub struct Config {
@ -22,11 +28,45 @@ pub struct Config {
    pub char_set: CharacterSet,
    /// True to pad output with `=` characters
    pub pad: bool,
+    /// Remove whitespace before decoding, at the cost of an allocation
+    pub strip_whitespace: bool,
+    /// Characters per line, None (or Some(0), but less "proper") for no linebreaks
+    pub line_size: Option<usize>,
+    /// Unix or Windows line endings, ignored if above None/Some(0)
+    pub line_ending: LineEnding,
 }

-pub static STANDARD: Config = Config {char_set: CharacterSet::Standard, pad: true};
-pub static URL_SAFE: Config = Config {char_set: CharacterSet::UrlSafe, pad: true};
-pub static URL_SAFE_NO_PAD: Config = Config {char_set: CharacterSet::UrlSafe, pad: false};
+pub static STANDARD: Config = Config {
+    char_set: CharacterSet::Standard,
+    pad: true,
+    strip_whitespace: false,
+    line_size: None,
+    line_ending: LineEnding::LF,
+};
+
+pub static MIME: Config = Config {
+    char_set: CharacterSet::Standard,
+    pad: true,
+    strip_whitespace: true,
+    line_size: Some(76),
+    line_ending: LineEnding::CRLF,
+};
+
+pub static URL_SAFE: Config = Config {
+    char_set: CharacterSet::UrlSafe,
+    pad: true,
+    strip_whitespace: false,
+    line_size: None,
+    line_ending: LineEnding::LF,
+};
+
+pub static URL_SAFE_NO_PAD: Config = Config {
+    char_set: CharacterSet::UrlSafe,
+    pad: false,
+    strip_whitespace: false,
+    line_size: None,
+    line_ending: LineEnding::LF,
+};


 #[derive(Debug, PartialEq, Eq)]
@ -95,30 +135,6 @@ pub fn decode<T: ?Sized + AsRef<[u8]>>(input: &T) -> Result<Vec<u8>, DecodeError
    decode_config(input, STANDARD)
 }

-///DEPRECATED -- will be replaced by `decode_config(input, Base64Mode::MIME);`
-///
-///Decode from string reference as octets.
-///Returns a Result containing a Vec<u8>.
-///Ignores extraneous whitespace.
-///
-///# Example
-///
-///```rust
-///extern crate base64;
-///
-///fn main() {
-///    let bytes = base64::decode_ws("aG VsbG8gd2\r\n9ybGQ=").unwrap();
-///    println!("{:?}", bytes);
-///}
-///```
-pub fn decode_ws(input: &str) -> Result<Vec<u8>, DecodeError> {
-    let mut raw = Vec::<u8>::with_capacity(input.len());
-    raw.extend(input.bytes().filter(|b| !b" \n\t\r\x0c".contains(b)));
-
-    let sans_ws = String::from_utf8(raw).unwrap();
-    decode_config(&sans_ws, STANDARD)
-}
-
 ///Encode arbitrary octets as base64.
 ///Returns a String.
 ///
@ -136,7 +152,7 @@ pub fn decode_ws(input: &str) -> Result<Vec<u8>, DecodeError> {
 ///}
 ///```
 pub fn encode_config(input: &[u8], config: Config) -> String {
-    let mut buf = String::with_capacity(encoded_size(input.len()));
+    let mut buf = String::with_capacity(encoded_size(input.len(), config));

    encode_config_buf(input, config, &mut buf);

@ -144,18 +160,26 @@ pub fn encode_config(input: &[u8], config: Config) -> String {
 }

 /// calculate the base64 encoded string size, including padding
-fn encoded_size(bytes_len: usize) -> usize {
+fn encoded_size(bytes_len: usize, config: Config) -> usize {
    let rem = bytes_len % 3;

    let complete_input_chunks = bytes_len / 3;
    let complete_output_chars = complete_input_chunks * 4;
-    let leftover_output_chars = if rem == 0 {
-        0
+    let printing_output_chars = if rem == 0 {
+        complete_output_chars
    } else {
-        4
+        complete_output_chars + 4
+    };
+    let line_ending_length = match config.line_ending {
+        LineEnding::CRLF => 2,
+        LineEnding::LF => 1,
+    };
+    let line_ending_output_chars = match config.line_size {
+        Some(0) | None => 0,
+        Some(n) => printing_output_chars / n * line_ending_length,
    };

-    return complete_output_chars + leftover_output_chars;
+    return printing_output_chars + line_ending_output_chars;
 }

 ///Encode arbitrary octets as base64.
@ -183,7 +207,7 @@ pub fn encode_config_buf(input: &[u8], config: Config, buf: &mut String) {
    };

    // reserve to make sure the memory we'll be writing to with unsafe is allocated
-    buf.reserve(encoded_size(input.len()));
+    buf.reserve(encoded_size(input.len(), config));

    let orig_buf_len = buf.len();
    let mut fast_loop_output_buf_len = orig_buf_len;
@ -255,6 +279,25 @@ pub fn encode_config_buf(input: &[u8], config: Config, buf: &mut String) {
            raw.push(0x3d);
        }
    }
+
+    if config.line_size.is_some() && config.line_size.unwrap() > 0 {
+        let line_size = config.line_size.unwrap();
+        let len = raw.len();
+        let mut i = 0;
+        let mut j = 0;
+
+        while i < len {
+            if i > 0 && i % line_size == 0 {
+                match config.line_ending {
+                    LineEnding::LF => { raw.insert(j, b'\n'); j += 1; }
+                    LineEnding::CRLF => { raw.insert(j, b'\r'); raw.insert(j + 1, b'\n'); j += 2; }
+                }
+            }
+
+            i += 1;
+            j += 1;
+        }
+    }
 }

 ///Decode from string reference as octets.
@ -303,7 +346,16 @@ pub fn decode_config_buf<T: ?Sized + AsRef<[u8]>>(input: &T,
                                                  config: Config,
                                                  buffer: &mut Vec<u8>)
                                                  -> Result<(), DecodeError> {
-    let input_bytes = input.as_ref();
+    let mut input_copy;
+    let input_bytes = if config.strip_whitespace {
+        input_copy = Vec::<u8>::with_capacity(input.as_ref().len());
+        input_copy.extend(input.as_ref().iter().filter(|b| !b" \n\t\r\x0c".contains(b)));
+
+        input_copy.as_ref()
+    } else {
+        input.as_ref()
+    };
+
    let ref decode_table = match config.char_set {
        CharacterSet::Standard => tables::STANDARD_DECODE,
        CharacterSet::UrlSafe => tables::URL_SAFE_DECODE,
@ -504,22 +556,88 @@ pub fn decode_config_buf<T: ?Sized + AsRef<[u8]>>(input: &T,

 #[cfg(test)]
 mod tests {
-    use super::encoded_size;
+    use super::{encoded_size, STANDARD, MIME, Config, CharacterSet, LineEnding};

    #[test]
    fn encoded_size_correct() {
-        assert_eq!(0, encoded_size(0));
+        assert_eq!(0, encoded_size(0, STANDARD));

-        assert_eq!(4, encoded_size(1));
-        assert_eq!(4, encoded_size(2));
-        assert_eq!(4, encoded_size(3));
+        assert_eq!(4, encoded_size(1, STANDARD));
+        assert_eq!(4, encoded_size(2, STANDARD));
+        assert_eq!(4, encoded_size(3, STANDARD));

-        assert_eq!(8, encoded_size(4));
-        assert_eq!(8, encoded_size(5));
-        assert_eq!(8, encoded_size(6));
+        assert_eq!(8, encoded_size(4, STANDARD));
+        assert_eq!(8, encoded_size(5, STANDARD));
+        assert_eq!(8, encoded_size(6, STANDARD));

-        assert_eq!(12, encoded_size(7));
-        assert_eq!(12, encoded_size(8));
-        assert_eq!(12, encoded_size(9));
+        assert_eq!(12, encoded_size(7, STANDARD));
+        assert_eq!(12, encoded_size(8, STANDARD));
+        assert_eq!(12, encoded_size(9, STANDARD));
+
+        assert_eq!(72, encoded_size(54, STANDARD));
+
+        assert_eq!(76, encoded_size(55, STANDARD));
+        assert_eq!(76, encoded_size(56, STANDARD));
+        assert_eq!(76, encoded_size(57, STANDARD));
+
+        assert_eq!(80, encoded_size(58, STANDARD));
+    }
+
+    #[test]
+    fn encoded_size_correct_mime() {
+        assert_eq!(0, encoded_size(0, MIME));
+
+        assert_eq!(4, encoded_size(1, MIME));
+        assert_eq!(4, encoded_size(2, MIME));
+        assert_eq!(4, encoded_size(3, MIME));
+
+        assert_eq!(8, encoded_size(4, MIME));
+        assert_eq!(8, encoded_size(5, MIME));
+        assert_eq!(8, encoded_size(6, MIME));
+
+        assert_eq!(12, encoded_size(7, MIME));
+        assert_eq!(12, encoded_size(8, MIME));
+        assert_eq!(12, encoded_size(9, MIME));
+
+        assert_eq!(72, encoded_size(54, MIME));
+
+        assert_eq!(78, encoded_size(55, MIME));
+        assert_eq!(78, encoded_size(56, MIME));
+        assert_eq!(78, encoded_size(57, MIME));
+
+        assert_eq!(82, encoded_size(58, MIME));
+    }
+
+    #[test]
+    fn encoded_size_correct_lf() {
+        let config = Config {
+            char_set: CharacterSet::Standard,
+            pad: true,
+            strip_whitespace: false,
+            line_size: Some(76),
+            line_ending: LineEnding::LF,
+        };
+
+        assert_eq!(0, encoded_size(0, config));
+
+        assert_eq!(4, encoded_size(1, config));
+        assert_eq!(4, encoded_size(2, config));
+        assert_eq!(4, encoded_size(3, config));
+
+        assert_eq!(8, encoded_size(4, config));
+        assert_eq!(8, encoded_size(5, config));
+        assert_eq!(8, encoded_size(6, config));
+
+        assert_eq!(12, encoded_size(7, config));
+        assert_eq!(12, encoded_size(8, config));
+        assert_eq!(12, encoded_size(9, config));
+
+        assert_eq!(72, encoded_size(54, config));
+
+        assert_eq!(77, encoded_size(55, config));
+        assert_eq!(77, encoded_size(56, config));
+        assert_eq!(77, encoded_size(57, config));
+
+        assert_eq!(81, encoded_size(58, config));
    }
 }
--- a/tests/tests.rs
+++ b/tests/tests.rs
@ -14,8 +14,8 @@ fn compare_decode(expected: &str, target: &str) {
    assert_eq!(expected, String::from_utf8(decode(target.as_bytes()).unwrap()).unwrap());
 }

-fn compare_decode_ws(expected: &str, target: &str) {
-    assert_eq!(expected, String::from_utf8(decode_ws(target).unwrap()).unwrap());
+fn compare_decode_mime(expected: &str, target: &str) {
+    assert_eq!(expected, String::from_utf8(decode_config(target, MIME).unwrap()).unwrap());
 }

 fn push_rand(buf: &mut Vec<u8>, len: usize) {
@ -515,13 +515,35 @@ fn decode_reject_null() {
    assert!(decode("YWx\0pY2U=").is_ok());
 }

-
-//TODO unicode tests
-//put in a seperate file so this remains valid ascii
+#[test]
+fn decode_mime_allow_space() {
+    assert!(decode_config("YWx pY2U=", MIME).is_ok());
+}

 #[test]
-fn decode_ws_absurd_whitespace() {
-    compare_decode_ws("how could you let this happen",
+fn decode_mime_allow_tab() {
+    assert!(decode_config("YWx\tpY2U=", MIME).is_ok());
+}
+
+#[test]
+fn decode_mime_allow_nl() {
+    assert!(decode_config("YWx\npY2U=", MIME).is_ok());
+}
+
+#[test]
+fn decode_mime_allow_crnl() {
+    assert!(decode_config("YWx\r\npY2U=", MIME).is_ok());
+}
+
+#[test]
+#[should_panic]
+fn decode_mime_reject_null() {
+    assert!(decode_config("YWx\0pY2U=", MIME).is_ok());
+}
+
+#[test]
+fn decode_mime_absurd_whitespace() {
+    compare_decode_mime("how could you let this happen",
        "\n aG93I\n\nGNvd\r\nWxkI HlvdSB \tsZXQgdGh\rpcyBo\x0cYXBwZW4 =   ");
 }