diff --git a/.gitignore b/.gitignore index 77bf8c8..c2a0d3e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,7 @@ Cargo.lock .settings *~ *.bk +fuzz/target +fuzz/Cargo.lock +fuzz/artifacts +fuzz/corpus diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..8bdf24f --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,22 @@ + +[package] +name = "encoding_rs-fuzz" +version = "0.0.1" +authors = ["Automatically generated"] +publish = false + +[package.metadata] +cargo-fuzz = true + +[dependencies.encoding_rs] +path = ".." +[dependencies.libfuzzer-sys] +git = "https://github.com/rust-fuzz/libfuzzer-sys.git" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzz_encodings" +path = "fuzzers/fuzz_encodings.rs" diff --git a/fuzz/fuzzers/fuzz_encodings.rs b/fuzz/fuzzers/fuzz_encodings.rs new file mode 100644 index 0000000..d04e424 --- /dev/null +++ b/fuzz/fuzzers/fuzz_encodings.rs @@ -0,0 +1,494 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; +extern crate encoding_rs; + +use encoding_rs::*; + +// Doesn't included ISO-8859-8-I. +static ENCODINGS: [&'static Encoding; 39] = [&UTF_8_INIT, + &REPLACEMENT_INIT, + &GBK_INIT, + &BIG5_INIT, + &EUC_JP_INIT, + &GB18030_INIT, + &UTF_16BE_INIT, + &UTF_16LE_INIT, + &SHIFT_JIS_INIT, + &EUC_KR_INIT, + &ISO_2022_JP_INIT, + &X_USER_DEFINED_INIT, + &WINDOWS_1250_INIT, + &WINDOWS_1251_INIT, + &WINDOWS_1252_INIT, + &WINDOWS_1253_INIT, + &WINDOWS_1254_INIT, + &WINDOWS_1255_INIT, + &WINDOWS_1256_INIT, + &WINDOWS_1257_INIT, + &WINDOWS_1258_INIT, + &KOI8_U_INIT, + &MACINTOSH_INIT, + &IBM866_INIT, + &KOI8_R_INIT, + &ISO_8859_2_INIT, + &ISO_8859_3_INIT, + &ISO_8859_4_INIT, + &ISO_8859_5_INIT, + &ISO_8859_6_INIT, + &ISO_8859_7_INIT, + &ISO_8859_10_INIT, + &ISO_8859_13_INIT, + &ISO_8859_14_INIT, + &WINDOWS_874_INIT, + &ISO_8859_15_INIT, + &ISO_8859_16_INIT, + &ISO_8859_8_I_INIT, + &X_MAC_CYRILLIC_INIT]; + +fn as_u16_slice(data: &[u8]) -> &[u16] { + unsafe { + let ptr = data.as_ptr(); + let len = data.len(); + if len < 2 { + return ::std::slice::from_raw_parts(ptr as *const u16, 0); + } + let (adj_ptr, adj_len) = if ptr as usize & 1 == 0 { + (ptr, len / 2) + } else { + (ptr.offset(1), (len - 1) / 2) + }; + ::std::slice::from_raw_parts(adj_ptr as *const u16, adj_len) + } +} + +fn decode(encoding: &'static Encoding, data: &[u8]) { + let (_, _, _) = encoding.decode(data); +} + +fn decode_with_bom_removal(encoding: &'static Encoding, data: &[u8]) { + let (_, _) = encoding.decode_with_bom_removal(data); +} + +fn decode_without_bom_handling(encoding: &'static Encoding, data: &[u8]) { + let (_, _) = encoding.decode_without_bom_handling(data); +} + +fn decode_without_bom_handling_and_without_replacement(encoding: &'static Encoding, data: &[u8]) { + let _ = encoding.decode_without_bom_handling_and_without_replacement(data); +} + +fn encode(encoding: &'static Encoding, data: &[u8]) { + if let Ok(s) = ::std::str::from_utf8(data) { + let (_, _, _) = encoding.encode(s); + } +} + +fn encode_from_utf8(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 2 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + if let Ok(s) = ::std::str::from_utf8(&data[2..]) { + let mut encoder = encoding.new_encoder(); + let mut cs = s.chars(); + let mut string = String::new(); + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let chunk_size = if phase { first } else { second }; + string.clear(); + for _ in 0..chunk_size { + if let Some(c) = cs.next() { + string.push(c); + } else { + let mut total_read = 0; + loop { + if let Some(needed) = encoder + .max_buffer_length_from_utf8_if_no_unmappables( + string.len() - total_read, + ) { + dst.resize(needed, 0); + let (result, read, _, _) = + encoder.encode_from_utf8(&string[total_read..], &mut dst, true); + total_read += read; + if result == CoderResult::InputEmpty { + break; + } + } + } + return; + } + } + let mut total_read = 0; + loop { + if let Some(needed) = encoder.max_buffer_length_from_utf8_if_no_unmappables( + string.len() - total_read, + ) { + dst.resize(needed, 0); + let (result, read, _, _) = + encoder.encode_from_utf8(&string[total_read..], &mut dst, false); + total_read += read; + if result == CoderResult::InputEmpty { + break; + } + } else { + return; + } + } + } + } +} + +fn encode_from_utf8_without_replacement(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 2 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + if let Ok(s) = ::std::str::from_utf8(&data[2..]) { + let mut encoder = encoding.new_encoder(); + let mut cs = s.chars(); + let mut string = String::new(); + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let chunk_size = if phase { first } else { second }; + string.clear(); + for _ in 0..chunk_size { + if let Some(c) = cs.next() { + string.push(c); + } else { + if let Some(needed) = + encoder.max_buffer_length_from_utf8_without_replacement(string.len()) { + dst.resize(needed, 0); + let (result, _, _) = + encoder.encode_from_utf8_without_replacement(&string, &mut dst, true); + assert_ne!(result, EncoderResult::OutputFull); + } + return; + } + } + if let Some(needed) = + encoder.max_buffer_length_from_utf8_without_replacement(string.len()) { + dst.resize(needed, 0); + let (result, _, _) = + encoder.encode_from_utf8_without_replacement(&string, &mut dst, false); + match result { + EncoderResult::InputEmpty => {}, + EncoderResult::OutputFull => unreachable!("Bogus max size math"), + EncoderResult::Unmappable(_) => return, + } + } else { + return; + } + } + } +} + +fn encode_from_utf16(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 2 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + let s = as_u16_slice(&data[2..]); + let mut encoder = encoding.new_encoder(); + let mut offset = 0; + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let mut chunk_size = if phase { first } else { second }; + let mut last = false; + if offset + chunk_size >= s.len() { + last = true; + chunk_size = s.len() - offset; + } + let new_offset = offset + chunk_size; + let chunk = &s[offset..new_offset]; + offset = new_offset; + let mut total_read = 0; + loop { + if let Some(needed) = + encoder.max_buffer_length_from_utf16_if_no_unmappables(chunk.len() - total_read) { + dst.resize(needed, 0); + let (result, read, _, _) = + encoder.encode_from_utf16(&chunk[total_read..], &mut dst, last); + total_read += read; + if result == CoderResult::InputEmpty { + if last { + return; + } + break; + } + } + } + } +} + +fn encode_from_utf16_without_replacement(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 2 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + let s = as_u16_slice(&data[2..]); + let mut encoder = encoding.new_encoder(); + let mut offset = 0; + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let mut chunk_size = if phase { first } else { second }; + let mut last = false; + if offset + chunk_size >= s.len() { + last = true; + chunk_size = s.len() - offset; + } + let new_offset = offset + chunk_size; + let chunk = &s[offset..new_offset]; + offset = new_offset; + if let Some(needed) = + encoder.max_buffer_length_from_utf16_without_replacement(chunk.len()) { + dst.resize(needed, 0); + let (result, _, _) = + encoder.encode_from_utf16_without_replacement(&chunk, &mut dst, last); + match result { + EncoderResult::InputEmpty => { + if last { + return; + } + }, + EncoderResult::OutputFull => unreachable!("Bogus max size math"), + EncoderResult::Unmappable(_) => return, + } + } + } +} + +fn decode_to_utf16(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 3 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + let mut decoder = match data[2] % 3 { + 0 => encoding.new_decoder(), + 1 => encoding.new_decoder_with_bom_removal(), + _ => encoding.new_decoder_without_bom_handling(), + }; + let s = &data[3..]; + let mut offset = 0; + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let mut chunk_size = if phase { first } else { second }; + let mut last = false; + if offset + chunk_size >= s.len() { + last = true; + chunk_size = s.len() - offset; + } + let new_offset = offset + chunk_size; + let chunk = &s[offset..new_offset]; + offset = new_offset; + if let Some(needed) = + decoder.max_utf16_buffer_length(chunk.len()) { + dst.resize(needed, 0); + let (result, _, _, _) = + decoder.decode_to_utf16(&chunk, &mut dst, last); + assert_ne!(result, CoderResult::OutputFull); + if last { + return; + } + } + } +} + +fn decode_to_utf16_without_replacement(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 3 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + let mut decoder = match data[2] % 3 { + 0 => encoding.new_decoder(), + 1 => encoding.new_decoder_with_bom_removal(), + _ => encoding.new_decoder_without_bom_handling(), + }; + let s = &data[3..]; + let mut offset = 0; + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let mut chunk_size = if phase { first } else { second }; + let mut last = false; + if offset + chunk_size >= s.len() { + last = true; + chunk_size = s.len() - offset; + } + let new_offset = offset + chunk_size; + let chunk = &s[offset..new_offset]; + offset = new_offset; + if let Some(needed) = + decoder.max_utf16_buffer_length(chunk.len()) { + dst.resize(needed, 0); + let (result, _, _) = + decoder.decode_to_utf16_without_replacement(&chunk, &mut dst, last); + match result { + DecoderResult::InputEmpty => { + if last { + return; + } + }, + DecoderResult::OutputFull => unreachable!("Bogus max size math"), + DecoderResult::Malformed(_, _) => return, + } + } + } +} + +fn decode_to_utf8(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 3 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + let mut decoder = match data[2] % 3 { + 0 => encoding.new_decoder(), + 1 => encoding.new_decoder_with_bom_removal(), + _ => encoding.new_decoder_without_bom_handling(), + }; + let s = &data[3..]; + let mut offset = 0; + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let mut chunk_size = if phase { first } else { second }; + let mut last = false; + if offset + chunk_size >= s.len() { + last = true; + chunk_size = s.len() - offset; + } + let new_offset = offset + chunk_size; + let chunk = &s[offset..new_offset]; + offset = new_offset; + if let Some(needed) = + decoder.max_utf8_buffer_length(chunk.len()) { + dst.resize(needed, 0); + let (result, _, _, _) = + decoder.decode_to_utf8(&chunk, &mut dst, last); + assert_ne!(result, CoderResult::OutputFull); + if last { + return; + } + } + } +} + +fn decode_to_utf8_without_replacement(encoding: &'static Encoding, data: &[u8]) { + if data.len() < 3 { + return; + } + let first = data[0] as usize; + let second = data[1] as usize; + if first == 0 || second == 0 { + return; + } + let mut decoder = match data[2] % 3 { + 0 => encoding.new_decoder(), + 1 => encoding.new_decoder_with_bom_removal(), + _ => encoding.new_decoder_without_bom_handling(), + }; + let s = &data[3..]; + let mut offset = 0; + let mut dst = Vec::new(); + let mut phase = false; + loop { + phase = !phase; + let mut chunk_size = if phase { first } else { second }; + let mut last = false; + if offset + chunk_size >= s.len() { + last = true; + chunk_size = s.len() - offset; + } + let new_offset = offset + chunk_size; + let chunk = &s[offset..new_offset]; + offset = new_offset; + if let Some(needed) = + decoder.max_utf8_buffer_length_without_replacement(chunk.len()) { + dst.resize(needed, 0); + let (result, _, _) = + decoder.decode_to_utf8_without_replacement(&chunk, &mut dst, last); + match result { + DecoderResult::InputEmpty => { + if last { + return; + } + }, + DecoderResult::OutputFull => unreachable!("Bogus max size math"), + DecoderResult::Malformed(_, _) => return, + } + } + } +} + +fn dispatch_test(encoding: &'static Encoding, data: &[u8]) { + if let Some(first) = data.first() { + match *first % 13 { + 0 => decode(encoding, &data[1..]), + 1 => decode_with_bom_removal(encoding, &data[1..]), + 2 => decode_without_bom_handling(encoding, &data[1..]), + 3 => decode_without_bom_handling_and_without_replacement(encoding, &data[1..]), + 4 => encode(encoding, &data[1..]), + 5 => encode_from_utf8(encoding, &data[1..]), + 6 => encode_from_utf8_without_replacement(encoding, &data[1..]), + 7 => encode_from_utf16(encoding, &data[1..]), + 8 => encode_from_utf16_without_replacement(encoding, &data[1..]), + 9 => decode_to_utf16(encoding, &data[1..]), + 10 => decode_to_utf16_without_replacement(encoding, &data[1..]), + 11 => decode_to_utf8(encoding, &data[1..]), + 12 => decode_to_utf8_without_replacement(encoding, &data[1..]), + _ => unreachable!("wrong divisor"), + } + } +} + +fuzz_target!( + |data: &[u8]| { + if let Some(first) = data.first() { + let encoding = ENCODINGS[*first as usize % ENCODINGS.len()]; + dispatch_test(encoding, &data[1..]); + } + if data.is_empty() { + return; + } + } +);