Add MIME support

MIME config option, LineEnding enum, add necessary code to encode and decode
also get rid of decode_ws
This commit is contained in:
Alice Maz 2017-03-26 01:15:03 +00:00
parent 23381550ec
commit d0bba9775f
2 changed files with 193 additions and 53 deletions

View File

@ -15,6 +15,12 @@ pub enum CharacterSet {
UrlSafe
}
#[derive(Clone, Copy, Debug)]
pub enum LineEnding {
LF,
CRLF,
}
/// Contains configuration parameters for base64 encoding
#[derive(Clone, Copy, Debug)]
pub struct Config {
@ -22,11 +28,45 @@ pub struct Config {
pub char_set: CharacterSet,
/// True to pad output with `=` characters
pub pad: bool,
/// Remove whitespace before decoding, at the cost of an allocation
pub strip_whitespace: bool,
/// Characters per line, None (or Some(0), but less "proper") for no linebreaks
pub line_size: Option<usize>,
/// Unix or Windows line endings, ignored if above None/Some(0)
pub line_ending: LineEnding,
}
pub static STANDARD: Config = Config {char_set: CharacterSet::Standard, pad: true};
pub static URL_SAFE: Config = Config {char_set: CharacterSet::UrlSafe, pad: true};
pub static URL_SAFE_NO_PAD: Config = Config {char_set: CharacterSet::UrlSafe, pad: false};
pub static STANDARD: Config = Config {
char_set: CharacterSet::Standard,
pad: true,
strip_whitespace: false,
line_size: None,
line_ending: LineEnding::LF,
};
pub static MIME: Config = Config {
char_set: CharacterSet::Standard,
pad: true,
strip_whitespace: true,
line_size: Some(76),
line_ending: LineEnding::CRLF,
};
pub static URL_SAFE: Config = Config {
char_set: CharacterSet::UrlSafe,
pad: true,
strip_whitespace: false,
line_size: None,
line_ending: LineEnding::LF,
};
pub static URL_SAFE_NO_PAD: Config = Config {
char_set: CharacterSet::UrlSafe,
pad: false,
strip_whitespace: false,
line_size: None,
line_ending: LineEnding::LF,
};
#[derive(Debug, PartialEq, Eq)]
@ -95,30 +135,6 @@ pub fn decode<T: ?Sized + AsRef<[u8]>>(input: &T) -> Result<Vec<u8>, DecodeError
decode_config(input, STANDARD)
}
///DEPRECATED -- will be replaced by `decode_config(input, Base64Mode::MIME);`
///
///Decode from string reference as octets.
///Returns a Result containing a Vec<u8>.
///Ignores extraneous whitespace.
///
///# Example
///
///```rust
///extern crate base64;
///
///fn main() {
/// let bytes = base64::decode_ws("aG VsbG8gd2\r\n9ybGQ=").unwrap();
/// println!("{:?}", bytes);
///}
///```
pub fn decode_ws(input: &str) -> Result<Vec<u8>, DecodeError> {
let mut raw = Vec::<u8>::with_capacity(input.len());
raw.extend(input.bytes().filter(|b| !b" \n\t\r\x0c".contains(b)));
let sans_ws = String::from_utf8(raw).unwrap();
decode_config(&sans_ws, STANDARD)
}
///Encode arbitrary octets as base64.
///Returns a String.
///
@ -136,7 +152,7 @@ pub fn decode_ws(input: &str) -> Result<Vec<u8>, DecodeError> {
///}
///```
pub fn encode_config(input: &[u8], config: Config) -> String {
let mut buf = String::with_capacity(encoded_size(input.len()));
let mut buf = String::with_capacity(encoded_size(input.len(), config));
encode_config_buf(input, config, &mut buf);
@ -144,18 +160,26 @@ pub fn encode_config(input: &[u8], config: Config) -> String {
}
/// calculate the base64 encoded string size, including padding
fn encoded_size(bytes_len: usize) -> usize {
fn encoded_size(bytes_len: usize, config: Config) -> usize {
let rem = bytes_len % 3;
let complete_input_chunks = bytes_len / 3;
let complete_output_chars = complete_input_chunks * 4;
let leftover_output_chars = if rem == 0 {
0
let printing_output_chars = if rem == 0 {
complete_output_chars
} else {
4
complete_output_chars + 4
};
let line_ending_length = match config.line_ending {
LineEnding::CRLF => 2,
LineEnding::LF => 1,
};
let line_ending_output_chars = match config.line_size {
Some(0) | None => 0,
Some(n) => printing_output_chars / n * line_ending_length,
};
return complete_output_chars + leftover_output_chars;
return printing_output_chars + line_ending_output_chars;
}
///Encode arbitrary octets as base64.
@ -183,7 +207,7 @@ pub fn encode_config_buf(input: &[u8], config: Config, buf: &mut String) {
};
// reserve to make sure the memory we'll be writing to with unsafe is allocated
buf.reserve(encoded_size(input.len()));
buf.reserve(encoded_size(input.len(), config));
let orig_buf_len = buf.len();
let mut fast_loop_output_buf_len = orig_buf_len;
@ -255,6 +279,25 @@ pub fn encode_config_buf(input: &[u8], config: Config, buf: &mut String) {
raw.push(0x3d);
}
}
if config.line_size.is_some() && config.line_size.unwrap() > 0 {
let line_size = config.line_size.unwrap();
let len = raw.len();
let mut i = 0;
let mut j = 0;
while i < len {
if i > 0 && i % line_size == 0 {
match config.line_ending {
LineEnding::LF => { raw.insert(j, b'\n'); j += 1; }
LineEnding::CRLF => { raw.insert(j, b'\r'); raw.insert(j + 1, b'\n'); j += 2; }
}
}
i += 1;
j += 1;
}
}
}
///Decode from string reference as octets.
@ -303,7 +346,16 @@ pub fn decode_config_buf<T: ?Sized + AsRef<[u8]>>(input: &T,
config: Config,
buffer: &mut Vec<u8>)
-> Result<(), DecodeError> {
let input_bytes = input.as_ref();
let mut input_copy;
let input_bytes = if config.strip_whitespace {
input_copy = Vec::<u8>::with_capacity(input.as_ref().len());
input_copy.extend(input.as_ref().iter().filter(|b| !b" \n\t\r\x0c".contains(b)));
input_copy.as_ref()
} else {
input.as_ref()
};
let ref decode_table = match config.char_set {
CharacterSet::Standard => tables::STANDARD_DECODE,
CharacterSet::UrlSafe => tables::URL_SAFE_DECODE,
@ -504,22 +556,88 @@ pub fn decode_config_buf<T: ?Sized + AsRef<[u8]>>(input: &T,
#[cfg(test)]
mod tests {
use super::encoded_size;
use super::{encoded_size, STANDARD, MIME, Config, CharacterSet, LineEnding};
#[test]
fn encoded_size_correct() {
assert_eq!(0, encoded_size(0));
assert_eq!(0, encoded_size(0, STANDARD));
assert_eq!(4, encoded_size(1));
assert_eq!(4, encoded_size(2));
assert_eq!(4, encoded_size(3));
assert_eq!(4, encoded_size(1, STANDARD));
assert_eq!(4, encoded_size(2, STANDARD));
assert_eq!(4, encoded_size(3, STANDARD));
assert_eq!(8, encoded_size(4));
assert_eq!(8, encoded_size(5));
assert_eq!(8, encoded_size(6));
assert_eq!(8, encoded_size(4, STANDARD));
assert_eq!(8, encoded_size(5, STANDARD));
assert_eq!(8, encoded_size(6, STANDARD));
assert_eq!(12, encoded_size(7));
assert_eq!(12, encoded_size(8));
assert_eq!(12, encoded_size(9));
assert_eq!(12, encoded_size(7, STANDARD));
assert_eq!(12, encoded_size(8, STANDARD));
assert_eq!(12, encoded_size(9, STANDARD));
assert_eq!(72, encoded_size(54, STANDARD));
assert_eq!(76, encoded_size(55, STANDARD));
assert_eq!(76, encoded_size(56, STANDARD));
assert_eq!(76, encoded_size(57, STANDARD));
assert_eq!(80, encoded_size(58, STANDARD));
}
#[test]
fn encoded_size_correct_mime() {
assert_eq!(0, encoded_size(0, MIME));
assert_eq!(4, encoded_size(1, MIME));
assert_eq!(4, encoded_size(2, MIME));
assert_eq!(4, encoded_size(3, MIME));
assert_eq!(8, encoded_size(4, MIME));
assert_eq!(8, encoded_size(5, MIME));
assert_eq!(8, encoded_size(6, MIME));
assert_eq!(12, encoded_size(7, MIME));
assert_eq!(12, encoded_size(8, MIME));
assert_eq!(12, encoded_size(9, MIME));
assert_eq!(72, encoded_size(54, MIME));
assert_eq!(78, encoded_size(55, MIME));
assert_eq!(78, encoded_size(56, MIME));
assert_eq!(78, encoded_size(57, MIME));
assert_eq!(82, encoded_size(58, MIME));
}
#[test]
fn encoded_size_correct_lf() {
let config = Config {
char_set: CharacterSet::Standard,
pad: true,
strip_whitespace: false,
line_size: Some(76),
line_ending: LineEnding::LF,
};
assert_eq!(0, encoded_size(0, config));
assert_eq!(4, encoded_size(1, config));
assert_eq!(4, encoded_size(2, config));
assert_eq!(4, encoded_size(3, config));
assert_eq!(8, encoded_size(4, config));
assert_eq!(8, encoded_size(5, config));
assert_eq!(8, encoded_size(6, config));
assert_eq!(12, encoded_size(7, config));
assert_eq!(12, encoded_size(8, config));
assert_eq!(12, encoded_size(9, config));
assert_eq!(72, encoded_size(54, config));
assert_eq!(77, encoded_size(55, config));
assert_eq!(77, encoded_size(56, config));
assert_eq!(77, encoded_size(57, config));
assert_eq!(81, encoded_size(58, config));
}
}

View File

@ -14,8 +14,8 @@ fn compare_decode(expected: &str, target: &str) {
assert_eq!(expected, String::from_utf8(decode(target.as_bytes()).unwrap()).unwrap());
}
fn compare_decode_ws(expected: &str, target: &str) {
assert_eq!(expected, String::from_utf8(decode_ws(target).unwrap()).unwrap());
fn compare_decode_mime(expected: &str, target: &str) {
assert_eq!(expected, String::from_utf8(decode_config(target, MIME).unwrap()).unwrap());
}
fn push_rand(buf: &mut Vec<u8>, len: usize) {
@ -515,13 +515,35 @@ fn decode_reject_null() {
assert!(decode("YWx\0pY2U=").is_ok());
}
//TODO unicode tests
//put in a seperate file so this remains valid ascii
#[test]
fn decode_mime_allow_space() {
assert!(decode_config("YWx pY2U=", MIME).is_ok());
}
#[test]
fn decode_ws_absurd_whitespace() {
compare_decode_ws("how could you let this happen",
fn decode_mime_allow_tab() {
assert!(decode_config("YWx\tpY2U=", MIME).is_ok());
}
#[test]
fn decode_mime_allow_nl() {
assert!(decode_config("YWx\npY2U=", MIME).is_ok());
}
#[test]
fn decode_mime_allow_crnl() {
assert!(decode_config("YWx\r\npY2U=", MIME).is_ok());
}
#[test]
#[should_panic]
fn decode_mime_reject_null() {
assert!(decode_config("YWx\0pY2U=", MIME).is_ok());
}
#[test]
fn decode_mime_absurd_whitespace() {
compare_decode_mime("how could you let this happen",
"\n aG93I\n\nGNvd\r\nWxkI HlvdSB \tsZXQgdGh\rpcyBo\x0cYXBwZW4 = ");
}