From 26e938d47daad9217b2d5959745cf7a16013de07 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Tue, 31 Oct 2017 16:04:44 -0500 Subject: [PATCH] Add `encode_config_slice` that writes to a `&[u8]`. Also, enable some compiler warnings and tighten up the docs. Hat tip to @quodlibetor in #49 for starting this work. --- .gitignore | 1 + README.md | 18 +---- src/display.rs | 2 +- src/lib.rs | 206 ++++++++++++++++++++++++++++++++++++++--------- src/line_wrap.rs | 14 ++-- src/tests.rs | 2 +- tests/tests.rs | 38 +++++++++ 7 files changed, 215 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index 4f66b12..bca35b1 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ main.rs # `perf record` files perf.data* +/tmp diff --git a/README.md b/README.md index 780e5b3..d8ab381 100644 --- a/README.md +++ b/README.md @@ -23,23 +23,7 @@ fn main() { } ``` -API ---- - -base64 exposes six functions: - -```rust -fn encode>(&T) -> String; -fn decode>(&T) -> Result, DecodeError>; -fn encode_config>(&T, Config) -> String; -fn encode_config_buf>(&T, Config, &mut String); -fn decode_config>(&T, Config) -> Result, DecodeError>; -fn decode_config_buf>(&T, Config, &mut Vec) -> Result<(), DecodeError>; -``` - -`STANDARD`, `URL_SAFE`, `URL_SAFE_NO_PAD`, and `MIME` configuation structs are provided for convenience. `encode` and `decode` are convenience wrappers for the `_config` functions called with the `STANDARD` config, and they are themselves wrappers of the `_buf` functions that allocate on the user's behalf. Encode produces valid padding absent a config that states otherwise; decode produces the same output for valid or omitted padding in all cases, but errors on invalid (superfluous) padding. Whitespace in the input to decode is an error for all modes except `MIME`, which disregards it ("whitespace" according to POSIX-locale `isspace`, meaning \n \r \f \t \v and space). - -`Config` exposes a constructor to allow custom combinations of character set, output padding, input whitespace permissiveness, linewrapping, and line ending character(s). The vast majority of usecases should be covered by the four provided, however. +See the [docs](https://docs.rs/base64) for all the details. Purpose --- diff --git a/src/display.rs b/src/display.rs index 1c3320d..942251f 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,4 +1,4 @@ -//! Enables base64'd output anywhere you might use a Display implementation, like a format string. +//! Enables base64'd output anywhere you might use a `Display` implementation, like a format string. //! //! ``` //! use base64::STANDARD; diff --git a/src/lib.rs b/src/lib.rs index d689569..ac57827 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,51 @@ +//! # Configs +//! +//! There isn't just one type of Base64; that would be too simple. You need to choose a character +//! set (standard or URL-safe), padding suffix (yes/no), and line wrap (line length, line ending). +//! The `Config` struct encapsulates this info. There are some common configs included: `STANDARD`, +//! `MIME`, etc. You can also make your own `Config` if needed. +//! +//! The functions that don't have `config` in the name (e.g. `encode()` and `decode()`) use the +//! `STANDARD` config . +//! +//! # Encoding +//! +//! Several different encoding functions are available to you depending on your desire for +//! convenience vs performance. +//! +//! | Function | Output | Allocates | +//! | ----------------------- | ---------------------------- | ------------------------------ | +//! | `encode` | Returns a new `String` | Always | +//! | `encode_config` | Returns a new `String` | Always | +//! | `encode_config_buf` | Appends to provided `String` | Only if `String` needs to grow | +//! | `encode_config_slice` | Writes to provided `&[u8]` | Never | +//! +//! All of the encoding functions that take a `Config` will pad, line wrap, etc, as per the config. +//! +//! # Decoding +//! +//! Just as for encoding, there are different decoding functions available. +//! +//! | Function | Output | Allocates | +//! | ----------------------- | ----------------------------- | ------------------------------ | +//! | `decode` | Returns a new `Vec` | Always | +//! | `decode_config` | Returns a new `Vec` | Always | +//! | `decode_config_buf` | Appends to provided `Vec` | Only if `Vec` needs to grow | +//! +//! Unlike encoding, where all possible input is valid, decoding can fail (see `DecodeError`). +//! +//! Input can be invalid because it has invalid characters or invalid padding. (No padding at all is +//! valid, but incorrect padding is not.) +//! +//! Whitespace in the input is invalid unless `strip_whitespace` is enabled in the `Config` used. +//! +//! # Panics +//! +//! If length calculations result in overflowing `usize`, a panic will result. + +#![deny(missing_docs, trivial_casts, trivial_numeric_casts, unused_extern_crates, + unused_import_braces, unused_results, variant_size_differences, warnings)] + extern crate byteorder; use std::{fmt, error, str}; @@ -36,9 +84,12 @@ impl CharacterSet { } } +/// Line ending used in optional line wrapping. #[derive(Clone, Copy, Debug)] pub enum LineEnding { + /// Unix-style \n LF, + /// Windows-style \r\n CRLF, } @@ -51,10 +102,12 @@ impl LineEnding { } } +/// Line wrap configuration. #[derive(Clone, Copy, Debug)] pub enum LineWrap { + /// Don't wrap. NoWrap, - // wrap length is always > 0 + /// Wrap lines with the specified length and line ending. The length must be > 0. Wrap(usize, LineEnding) } @@ -65,13 +118,15 @@ pub struct Config { char_set: CharacterSet, /// True to pad output with `=` characters pad: bool, - /// Remove whitespace before decoding, at the cost of an allocation + /// Remove whitespace before decoding, at the cost of an allocation. Whitespace is defined + /// according to POSIX-locale `isspace`, meaning \n \r \f \t \v and space. strip_whitespace: bool, /// ADT signifying whether to linewrap output, and if so by how many characters and with what ending line_wrap: LineWrap, } impl Config { + /// Create a new `Config`. pub fn new(char_set: CharacterSet, pad: bool, strip_whitespace: bool, @@ -90,6 +145,7 @@ impl Config { } } +/// Standard character set with padding. pub const STANDARD: Config = Config { char_set: CharacterSet::Standard, pad: true, @@ -97,6 +153,7 @@ pub const STANDARD: Config = Config { line_wrap: LineWrap::NoWrap, }; +/// Standard character set without padding. pub const STANDARD_NO_PAD: Config = Config { char_set: CharacterSet::Standard, pad: false, @@ -104,6 +161,7 @@ pub const STANDARD_NO_PAD: Config = Config { line_wrap: LineWrap::NoWrap, }; +/// As per standards for MIME encoded messages pub const MIME: Config = Config { char_set: CharacterSet::Standard, pad: true, @@ -111,6 +169,7 @@ pub const MIME: Config = Config { line_wrap: LineWrap::Wrap(76, LineEnding::CRLF), }; +/// URL-safe character set with padding pub const URL_SAFE: Config = Config { char_set: CharacterSet::UrlSafe, pad: true, @@ -118,6 +177,7 @@ pub const URL_SAFE: Config = Config { line_wrap: LineWrap::NoWrap, }; +/// URL-safe character set without padding pub const URL_SAFE_NO_PAD: Config = Config { char_set: CharacterSet::UrlSafe, pad: false, @@ -125,9 +185,12 @@ pub const URL_SAFE_NO_PAD: Config = Config { line_wrap: LineWrap::NoWrap, }; +/// Errors that can occur while decoding. #[derive(Debug, PartialEq, Eq)] pub enum DecodeError { + /// An invalid byte was found in the input. The offset and offending byte are provided. InvalidByte(usize, u8), + /// The length of the input is invalid. InvalidLength, } @@ -218,40 +281,8 @@ pub fn encode_config>(input: &T, config: Config) -> Stri buf } -/// calculate the base64 encoded string size, including padding -fn encoded_size(bytes_len: usize, config: &Config) -> Option { - let rem = bytes_len % 3; - - let complete_input_chunks = bytes_len / 3; - let complete_chunk_output = complete_input_chunks.checked_mul(4); - - let encoded_len_no_wrap = if rem > 0 { - if config.pad { - complete_chunk_output.and_then(|c| c.checked_add(4)) - } else { - let encoded_rem = match rem { - 1 => 2, - 2 => 3, - _ => panic!("Impossible remainder") - }; - complete_chunk_output.and_then(|c| c.checked_add(encoded_rem)) - } - } else { - complete_chunk_output - }; - - encoded_len_no_wrap.map(|e| { - match config.line_wrap { - LineWrap::NoWrap => e, - LineWrap::Wrap(line_len, line_ending) => { - line_wrap_parameters(e, line_len, line_ending).total_len - } - } - }) -} - ///Encode arbitrary octets as base64. -///Writes into the supplied buffer to avoid allocations. +///Writes into the supplied output buffer, which will grow the buffer if needed. /// ///# Example /// @@ -285,14 +316,77 @@ pub fn encode_config_buf>(input: &T, config: Config, buf buf_bytes.resize(orig_buf_len.checked_add(encoded_size) .expect("usize overflow when calculating expanded buffer size"), 0); - let b64_output = &mut buf_bytes[orig_buf_len..]; + let mut b64_output = &mut buf_bytes[orig_buf_len..]; - let encoded_bytes = encode_with_padding(input_bytes, b64_output, config.char_set.encode_table(), + encode_with_padding_line_wrap(&input_bytes, &config, encoded_size, &mut b64_output); +} + +/// Encode arbitrary octets as base64. +/// Writes into the supplied output buffer. +/// +/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident +/// or statically-allocated buffer). +/// +/// # Panics +/// +/// If `output` is too small to hold the encoded version of `input`, a panic will result. +/// +/// # Example +/// +/// ```rust +/// extern crate base64; +/// +/// fn main() { +/// let s = b"hello internet!"; +/// let mut buf = Vec::new(); +/// // make sure we'll have a slice big enough for base64 + padding +/// buf.resize(s.len() * 4 / 3 + 4, 0); +/// +/// let bytes_written = base64::encode_config_slice(s, +/// base64::STANDARD, &mut buf); +/// +/// // shorten our vec down to just what was written +/// buf.resize(bytes_written, 0); +/// +/// assert_eq!(s, base64::decode(&buf).unwrap().as_slice()); +/// } +/// ``` +pub fn encode_config_slice>(input: &T, config: Config, output: &mut [u8]) -> usize { + let input_bytes = input.as_ref(); + + let encoded_size = encoded_size(input_bytes.len(), &config) + .expect("usize overflow when calculating buffer size"); + + let mut b64_output = &mut output[0..encoded_size]; + + encode_with_padding_line_wrap(&input_bytes, &config, encoded_size, &mut b64_output); + + encoded_size +} + +/// B64-encode, pad, and line wrap (if configured). +/// +/// This helper exists to avoid recalculating encoded_size, which is relatively expensive on short +/// inputs. +/// +/// `encoded_size` is the encoded size calculated for `input`. +/// +/// `output` must be of size `encoded_size`. +/// +/// All bytes in `output` will be written to since it is exactly the size of the output. +fn encode_with_padding_line_wrap(input: &[u8], config: &Config, encoded_size: usize, output: &mut [u8]) { + debug_assert_eq!(encoded_size, output.len()); + + let encoded_bytes = encode_with_padding(input, output, config.char_set.encode_table(), config.pad); - if let LineWrap::Wrap(line_len, line_end) = config.line_wrap { - line_wrap(b64_output, encoded_bytes, line_len, line_end); - } + let line_ending_bytes = if let LineWrap::Wrap(line_len, line_end) = config.line_wrap { + line_wrap(output, encoded_bytes, line_len, line_end) + } else { + 0 + }; + + debug_assert_eq!(encoded_size, encoded_bytes + line_ending_bytes); } /// Encode input bytes and pad if configured. @@ -426,6 +520,38 @@ fn encode_to_slice(input: &[u8], output: &mut [u8], encode_table: &[u8; 64]) -> output_index } +/// calculate the base64 encoded string size, including padding and line wraps if appropriate +fn encoded_size(bytes_len: usize, config: &Config) -> Option { + let rem = bytes_len % 3; + + let complete_input_chunks = bytes_len / 3; + let complete_chunk_output = complete_input_chunks.checked_mul(4); + + let encoded_len_no_wrap = if rem > 0 { + if config.pad { + complete_chunk_output.and_then(|c| c.checked_add(4)) + } else { + let encoded_rem = match rem { + 1 => 2, + 2 => 3, + _ => unreachable!("Impossible remainder") + }; + complete_chunk_output.and_then(|c| c.checked_add(encoded_rem)) + } + } else { + complete_chunk_output + }; + + encoded_len_no_wrap.map(|e| { + match config.line_wrap { + LineWrap::NoWrap => e, + LineWrap::Wrap(line_len, line_ending) => { + line_wrap_parameters(e, line_len, line_ending).total_len + } + } + }) +} + /// Write padding characters. /// `output` is the slice where padding should be written, of length at least 2. fn add_padding(input_len: usize, output: &mut[u8]) -> usize { diff --git a/src/line_wrap.rs b/src/line_wrap.rs index 2796b58..26525e4 100644 --- a/src/line_wrap.rs +++ b/src/line_wrap.rs @@ -212,7 +212,7 @@ mod tests { fn line_wrap_length_1_lf() { let mut buf = vec![0x1, 0x2, 0x3, 0x4]; - do_line_wrap(&mut buf, 1, LineEnding::LF); + assert_eq!(3, do_line_wrap(&mut buf, 1, LineEnding::LF)); assert_eq!(vec![0x1, 0xA, 0x2, 0xA, 0x3, 0xA, 0x4], buf); } @@ -221,7 +221,7 @@ mod tests { fn line_wrap_length_1_crlf() { let mut buf = vec![0x1, 0x2, 0x3, 0x4]; - do_line_wrap(&mut buf, 1, LineEnding::CRLF); + assert_eq!(6, do_line_wrap(&mut buf, 1, LineEnding::CRLF)); assert_eq!(vec![0x1, 0xD, 0xA, 0x2, 0xD, 0xA, 0x3, 0xD, 0xA, 0x4], buf); } @@ -230,7 +230,7 @@ mod tests { fn line_wrap_length_2_lf_full_lines() { let mut buf = vec![0x1, 0x2, 0x3, 0x4]; - do_line_wrap(&mut buf, 2, LineEnding::LF); + assert_eq!(1, do_line_wrap(&mut buf, 2, LineEnding::LF)); assert_eq!(vec![0x1, 0x2, 0xA, 0x3, 0x4], buf); } @@ -239,7 +239,7 @@ mod tests { fn line_wrap_length_2_crlf_full_lines() { let mut buf = vec![0x1, 0x2, 0x3, 0x4]; - do_line_wrap(&mut buf, 2, LineEnding::CRLF); + assert_eq!(2, do_line_wrap(&mut buf, 2, LineEnding::CRLF)); assert_eq!(vec![0x1, 0x2, 0xD, 0xA, 0x3, 0x4], buf); } @@ -248,7 +248,7 @@ mod tests { fn line_wrap_length_2_lf_partial_line() { let mut buf = vec![0x1, 0x2, 0x3, 0x4, 0x5]; - do_line_wrap(&mut buf, 2, LineEnding::LF); + assert_eq!(2, do_line_wrap(&mut buf, 2, LineEnding::LF)); assert_eq!(vec![0x1, 0x2, 0xA, 0x3, 0x4, 0xA, 0x5], buf); } @@ -257,7 +257,7 @@ mod tests { fn line_wrap_length_2_crlf_partial_line() { let mut buf = vec![0x1, 0x2, 0x3, 0x4, 0x5]; - do_line_wrap(&mut buf, 2, LineEnding::CRLF); + assert_eq!(4, do_line_wrap(&mut buf, 2, LineEnding::CRLF)); assert_eq!(vec![0x1, 0x2, 0xD, 0xA, 0x3, 0x4, 0xD, 0xA, 0x5], buf); } @@ -296,7 +296,7 @@ mod tests { let line_ending_offset = (line_ending_num + 1) * line_len; for _ in 0..line_ending_len { - buf.remove(line_ending_offset); + let _ = buf.remove(line_ending_offset); } } diff --git a/src/tests.rs b/src/tests.rs index dc3d179..2a86a6f 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,7 +1,7 @@ extern crate rand; use super::*; -use super::line_wrap::{line_wrap_parameters}; +use super::line_wrap::line_wrap_parameters; use self::rand::Rng; use self::rand::distributions::{IndependentSample, Range}; diff --git a/tests/tests.rs b/tests/tests.rs index 05ebba7..b43cf93 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -468,3 +468,41 @@ fn encode_url_safe_without_padding() { assert_eq!(&encoded, "YWxpY2U"); assert_eq!(String::from_utf8(decode(&encoded).unwrap()).unwrap(), "alice"); } + +#[test] +fn encode_config_slice_can_use_inline_buffer() { + let mut buf: [u8; 22] = [0; 22]; + let mut larger_buf: [u8; 24] = [0; 24]; + let mut input: [u8; 16] = [0; 16]; + + let mut rng = rand::weak_rng(); + for elt in &mut input { + *elt = rng.gen(); + } + + assert_eq!(22, encode_config_slice(&input, STANDARD_NO_PAD, &mut buf)); + let decoded = decode_config(&buf, STANDARD_NO_PAD).unwrap(); + + assert_eq!(decoded, input); + + // let's try it again with padding + + assert_eq!(24, encode_config_slice(&input, STANDARD, &mut larger_buf)); + let decoded = decode_config(&buf, STANDARD).unwrap(); + + assert_eq!(decoded, input); +} + +#[test] +#[should_panic(expected = "index 24 out of range for slice of length 22")] +fn encode_config_slice_panics_when_buffer_too_small() { + let mut buf: [u8; 22] = [0; 22]; + let mut input: [u8; 16] = [0; 16]; + + let mut rng = rand::weak_rng(); + for elt in &mut input { + *elt = rng.gen(); + } + + encode_config_slice(&input, STANDARD, &mut buf); +}