Merge pull request #81 from alicemaz/remove-ws-strip

Remove whitespace stripping.
2024-12-11 15:05:35 +00:00 · 2018-10-28 12:01:44 -06:00 · 2018-10-28 12:01:44 -06:00 · 95edf364fe
commit 95edf364fe
parent 1085da88d2 d7950e814e
7 changed files with 11 additions and 93 deletions
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@ -5,6 +5,7 @@
  configs that `unwrap()` for you are no longer needed
 - Add a streaming encoder `Write` impl to transparently base64 as you write.
 - Remove the remaining `unsafe` code.
+- Remove whitespace stripping to simplify `no_std` support. No out of the box configs use it, and it's trivial to do yourself if needed: `filter(|b| !b" \n\t\r\x0b\x0c".contains(b)`.

 # 0.9.3

--- a/src/chunked_encoder.rs
+++ b/src/chunked_encoder.rs
@ -176,7 +176,6 @@ pub mod tests {
        let config = Config::new(
            CharacterSet::Standard,
            false,
-            false,
        );
        assert_eq!(300, max_input_length(401, &config));
    }
@ -221,7 +220,7 @@ pub mod tests {
    }

    fn config_with_pad(pad: bool) -> Config {
-        Config::new(CharacterSet::Standard, pad, false)
+        Config::new(CharacterSet::Standard, pad)
    }

    // An abstraction around sinks so that we can have tests that easily to any sink implementation
--- a/src/decode.rs
+++ b/src/decode.rs
@ -120,13 +120,7 @@ pub fn decode_config_buf<T: ?Sized + AsRef<[u8]>>(
    config: Config,
    buffer: &mut Vec<u8>,
 ) -> Result<(), DecodeError> {
-    let input_copy;
-    let input_bytes = if config.strip_whitespace {
-        input_copy = copy_without_whitespace(input.as_ref());
-        input_copy.as_ref()
-    } else {
-        input.as_ref()
-    };
+    let input_bytes = input.as_ref();

    let starting_output_len = buffer.len();

@ -162,13 +156,7 @@ pub fn decode_config_slice<T: ?Sized + AsRef<[u8]>>(
    config: Config,
    output: &mut [u8],
 ) -> Result<usize, DecodeError> {
-    let input_copy;
-    let input_bytes = if config.strip_whitespace {
-        input_copy = copy_without_whitespace(input.as_ref());
-        input_copy.as_ref()
-    } else {
-        input.as_ref()
-    };
+    let input_bytes = input.as_ref();

    decode_helper(
        input_bytes,
@ -186,13 +174,6 @@ fn num_chunks(input: &[u8]) -> usize {
        .expect("Overflow when calculating number of chunks in input") / INPUT_CHUNK_LEN
 }

-fn copy_without_whitespace(input: &[u8]) -> Vec<u8> {
-    let mut input_copy = Vec::<u8>::with_capacity(input.len());
-    input_copy.extend(input.iter().filter(|b| !b" \n\t\r\x0b\x0c".contains(b)));
-
-    input_copy
-}
-
 /// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
 /// Returns the number of bytes written, or an error.
 // We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
--- a/src/lib.rs
+++ b/src/lib.rs
@ -31,10 +31,6 @@
 //!
 //! Just as for encoding, there are different decoding functions available.
 //!
-//! Note that all decode functions that take a config will allocate a copy of the input if you
-//! specify a config that requires whitespace to be stripped. If you care about speed, don't use
-//! formats that require whitespace stripping.
-//!
 //! | Function                | Output                        | Allocates                      |
 //! | ----------------------- | ----------------------------- | ------------------------------ |
 //! | `decode`                | Returns a new `Vec<u8>`       | Always                         |
@ -45,9 +41,7 @@
 //! Unlike encoding, where all possible input is valid, decoding can fail (see `DecodeError`).
 //!
 //! Input can be invalid because it has invalid characters or invalid padding. (No padding at all is
-//! valid, but excess padding is not.)
-//!
-//! Whitespace in the input is invalid unless `strip_whitespace` is enabled in the `Config` used.
+//! valid, but excess padding is not.) Whitespace in the input is invalid.
 //!
 //! # Panics
 //!
@ -118,9 +112,6 @@ pub struct Config {
    char_set: CharacterSet,
    /// True to pad output with `=` characters
    pad: bool,
-    /// Remove whitespace before decoding, at the cost of an allocation. Whitespace is defined
-    /// according to POSIX-locale `isspace`, meaning \n \r \f \t \v and space.
-    strip_whitespace: bool,
 }

 impl Config {
@ -128,12 +119,10 @@ impl Config {
    pub fn new(
        char_set: CharacterSet,
        pad: bool,
-        strip_whitespace: bool
    ) -> Config {
        Config {
            char_set,
            pad,
-            strip_whitespace,
        }
    }
 }
@ -142,33 +131,28 @@ impl Config {
 pub const STANDARD: Config = Config {
    char_set: CharacterSet::Standard,
    pad: true,
-    strip_whitespace: false,
 };

 /// Standard character set without padding.
 pub const STANDARD_NO_PAD: Config = Config {
    char_set: CharacterSet::Standard,
    pad: false,
-    strip_whitespace: false,
 };

 /// URL-safe character set with padding
 pub const URL_SAFE: Config = Config {
    char_set: CharacterSet::UrlSafe,
    pad: true,
-    strip_whitespace: false,
 };

 /// URL-safe character set without padding
 pub const URL_SAFE_NO_PAD: Config = Config {
    char_set: CharacterSet::UrlSafe,
    pad: false,
-    strip_whitespace: false,
 };

 /// As per `crypt(3)` requirements
 pub const CRYPT: Config = Config {
    char_set: CharacterSet::Crypt,
    pad: false,
-    strip_whitespace: false,
 };
--- a/src/tests.rs
+++ b/src/tests.rs
@ -78,7 +78,5 @@ pub fn random_config<R: Rng>(rng: &mut R) -> Config {
    ];
    let charset = *rng.choose(CHARSETS).unwrap();

-    let strip_whitespace = rng.gen();
-
-    Config::new(charset, rng.gen(), strip_whitespace)
+    Config::new(charset, rng.gen())
 }
--- a/tests/decode.rs
+++ b/tests/decode.rs
@ -6,13 +6,6 @@ mod helpers;

 use helpers::*;

-fn compare_decode_strip_ws(expected: &str, target: &str) {
-    assert_eq!(
-        expected,
-        String::from_utf8(decode_config(target, config_std_strip_whitespace()).unwrap()).unwrap()
-    );
-}
-
 #[test]
 fn decode_rfc4648_0() {
    compare_decode("", "");
@ -75,48 +68,10 @@ fn decode_rfc4648_6() {
 }

 #[test]
-fn decode_strip_ws_allow_space() {
-    assert!(decode_config("YWx pY2U=", config_std_strip_whitespace()).is_ok());
-}
-
-#[test]
-fn decode_strip_ws_allow_tab() {
-    assert!(decode_config("YWx\tpY2U=", config_std_strip_whitespace()).is_ok());
-}
-
-#[test]
-fn decode_strip_ws_allow_ff() {
-    assert!(decode_config("YWx\x0cpY2U=", config_std_strip_whitespace()).is_ok());
-}
-
-#[test]
-fn decode_strip_ws_allow_vtab() {
-    assert!(decode_config("YWx\x0bpY2U=", config_std_strip_whitespace()).is_ok());
-}
-
-#[test]
-fn decode_strip_ws_allow_nl() {
-    assert!(decode_config("YWx\npY2U=", config_std_strip_whitespace()).is_ok());
-}
-
-#[test]
-fn decode_strip_ws_allow_crnl() {
-    assert!(decode_config("YWx\r\npY2U=", config_std_strip_whitespace()).is_ok());
-}
-
-#[test]
-fn decode_strip_ws_reject_null() {
+fn decode_reject_null() {
    assert_eq!(
        DecodeError::InvalidByte(3, 0x0),
-        decode_config("YWx\0pY2U==", config_std_strip_whitespace()).unwrap_err()
-    );
-}
-
-#[test]
-fn decode_mime_absurd_whitespace() {
-    compare_decode_strip_ws(
-        "how could you let this happen",
-        "\n aG93I\n\nG\x0bNvd\r\nWxkI HlvdSB \tsZXQgdGh\rpcyBo\x0cYXBwZW4 =   ",
+        decode_config("YWx\0pY2U==", config_std_pad()).unwrap_err()
    );
 }

@ -350,6 +305,6 @@ fn decode_reject_invalid_bytes_with_correct_error() {
    }
 }

-fn config_std_strip_whitespace() -> Config {
-    Config::new(CharacterSet::Standard, true, true)
+fn config_std_pad() -> Config {
+    Config::new(CharacterSet::Standard, true)
 }
--- a/tests/tests.rs
+++ b/tests/tests.rs
@ -53,7 +53,7 @@ fn calculate_number_of_rounds(byte_len: usize, approx_values_per_byte: u8, max:
 }

 fn no_pad_config() -> Config {
-    Config::new(CharacterSet::Standard, false, false)
+    Config::new(CharacterSet::Standard, false)
 }

 #[test]