Add tests around encoding/decoding into an existing buffer, and improve comments a little

2025-03-04 11:27:29 +00:00 · 2017-01-12 12:42:22 -08:00 · 2017-01-12 12:42:22 -08:00 · 1ba394c278
commit 1ba394c278
parent dc44239c5c
2 changed files with 117 additions and 15 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -285,24 +285,28 @@ pub fn decode_mode_buf(input: &str, mode: Base64Mode, buffer: &mut Vec<u8>) -> R

    buffer.reserve(input.len() * 3 / 4);

-    // the fast loop only handles complete blocks of 8 input morsels
-    let chunk_len = std::mem::size_of::<u64>();
-    let chunk_rem = input.len() % chunk_len;
-    let trailing_bytes_to_skip = if chunk_rem == 0 {
+    // the fast loop only handles complete chunks of 8 input bytes without padding
+    let chunk_len = 8;
+    let decoded_chunk_len = 6;
+    let remainder_len = input.len() % chunk_len;
+    let trailing_bytes_to_skip = if remainder_len == 0 {
        // if input is a multiple of the chunk size, ignore the last chunk as it may have padding
        chunk_len
    } else {
-        chunk_rem
+        remainder_len
    };
+
    let length_of_full_chunks = input.len().saturating_sub(trailing_bytes_to_skip);

-    // make sure buffer can hold enough for the fast loop
-    let starting_index = buffer.len();
-    // need the extra two bytes because we write a full 8 bytes for the last chunk
-    // and then truncate off two
-    buffer.resize(starting_index + length_of_full_chunks / 8 * 6 + 2, 0);
+    let starting_output_index = buffer.len();
+    // Resize to hold decoded output from fast loop. Need the extra two bytes because
+    // we write a full 8 bytes for the last 6-byte decoded chunk and then truncate off two
+    let new_size = starting_output_index
+        + length_of_full_chunks / chunk_len * decoded_chunk_len
+        + (chunk_len - decoded_chunk_len);
+    buffer.resize(new_size, 0);

-    let mut output_index = starting_index;
+    let mut output_index = starting_output_index;

    let input_bytes = input.as_bytes();
    {
@ -389,12 +393,13 @@ pub fn decode_mode_buf(input: &str, mode: Base64Mode, buffer: &mut Vec<u8>) -> R
    }

    // Truncate off the last two bytes from writing the last u64.
-    // Unconditional because we added on the extra 2 bytes in the resize before the loop.
-    let new_len = buffer.len() - 2;
+    // Unconditional because we added on the extra 2 bytes in the resize before the loop,
+    // so it will never underflow.
+    let new_len = buffer.len() - (chunk_len - decoded_chunk_len);
    buffer.truncate(new_len);

-    // handle leftovers (at most 8 bytes).
-    // Use a u64 as a stack-resident 8-byte Vec.
+    // handle leftovers (at most 8 bytes, decoded to 6).
+    // Use a u64 as a stack-resident 8 bytes buffer.
    let mut leftover_bits: u64 = 0;
    let mut morsels_in_leftover = 0;
    let mut padding_bytes = 0;
--- a/tests/tests.rs
+++ b/tests/tests.rs
@ -17,6 +17,14 @@ fn compare_decode_ws(expected: &str, target: &str) {
    assert_eq!(expected, String::from_utf8(decode_ws(target).unwrap()).unwrap());
 }

+fn push_rand(buf: &mut Vec<u8>, len: usize) {
+    let mut r = rand::weak_rng();
+
+    for _ in 0..len {
+        buf.push(r.gen::<u8>());
+    }
+}
+
 // generate every possible byte string recursively and test encode/decode roundtrip
 fn roundtrip_append_recurse(byte_buf: &mut Vec<u8>, str_buf: &mut String, remaining_bytes: usize) {
    let orig_length = byte_buf.len();
@ -335,6 +343,57 @@ fn decode_error_for_bogus_char_in_right_position() {
    }
 }

+#[test]
+fn decode_into_nonempty_buffer_doesnt_clobber_existing_contents() {
+    let mut orig_data = Vec::new();
+    let mut encoded_data = String::new();
+    let mut decoded_with_prefix = Vec::new();
+    let mut decoded_without_prefix = Vec::new();
+    let mut prefix = Vec::new();
+    for encoded_length in 0_usize..25 {
+        if encoded_length % 4 == 1 {
+            // can't have a lone byte in a quad of input
+            continue;
+        };
+
+        let raw_data_byte_triples = encoded_length / 4;
+        // 4 base64 bytes -> 3 input bytes, 3 -> 2, 2 -> 1, 0 -> 0
+        let raw_data_byte_leftovers = (encoded_length % 4).saturating_sub(1);
+
+        // we'll borrow buf to make some data to encode
+        orig_data.clear();
+        push_rand(&mut orig_data, raw_data_byte_triples * 3 + raw_data_byte_leftovers);
+
+        encoded_data.clear();
+        encode_mode_buf(&orig_data, Base64Mode::Standard, &mut encoded_data);
+
+        assert_eq!(encoded_length, encoded_data.trim_right_matches('=').len());
+
+        for prefix_length in 1..25 {
+            decoded_with_prefix.clear();
+            decoded_without_prefix.clear();
+            prefix.clear();
+
+            // fill the buf with a prefix
+            push_rand(&mut prefix, prefix_length);
+            decoded_with_prefix.resize(prefix_length, 0);
+            decoded_with_prefix.copy_from_slice(&prefix);
+
+            // decode into the non-empty buf
+            decode_mode_buf(&encoded_data, Base64Mode::Standard, &mut decoded_with_prefix).unwrap();
+            // also decode into the empty buf
+            decode_mode_buf(&encoded_data, Base64Mode::Standard, &mut decoded_without_prefix).unwrap();
+
+            assert_eq!(prefix_length + decoded_without_prefix.len(), decoded_with_prefix.len());
+
+            // append plain decode onto prefix
+            prefix.append(&mut decoded_without_prefix);
+
+            assert_eq!(prefix, decoded_with_prefix);
+        }
+    }
+}
+
 #[test]
 fn roundtrip_random_no_fast_loop() {
    let mut byte_buf: Vec<u8> = Vec::new();
@ -538,6 +597,44 @@ fn encode_all_bytes_url() {
    assert_eq!("AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy8_T19vf4-fr7_P3-_w==", encode_mode(&bytes, Base64Mode::UrlSafe));
 }

+#[test]
+fn encode_into_nonempty_buffer_doesnt_clobber_existing_contents() {
+    let mut orig_data = Vec::new();
+    let mut encoded_with_prefix = String::new();
+    let mut encoded_without_prefix = String::new();
+    let mut prefix = String::new();
+    for orig_data_length in 0_usize..25 {
+        // we'll borrow buf to make some data to encode
+        orig_data.clear();
+        push_rand(&mut orig_data, orig_data_length);
+
+        for prefix_length in 1..25 {
+            encoded_with_prefix.clear();
+            encoded_without_prefix.clear();
+            prefix.clear();
+
+            for _ in 0..prefix_length {
+                prefix.push('~');
+            }
+
+            encoded_with_prefix.push_str(&prefix);
+
+            // encode into the non-empty buf
+            encode_mode_buf(&orig_data, Base64Mode::Standard, &mut encoded_with_prefix);
+            // also encode into the empty buf
+            encode_mode_buf(&orig_data, Base64Mode::Standard, &mut encoded_without_prefix);
+
+            assert_eq!(prefix_length + encoded_without_prefix.len(), encoded_with_prefix.len());
+
+            // append plain decode onto prefix
+            prefix.push_str(&mut encoded_without_prefix);
+
+            assert_eq!(prefix, encoded_with_prefix);
+        }
+    }
+}
+
+
 #[test]
 fn because_we_can() {
    compare_decode("alice", "YWxpY2U=");