diff --git a/benches/cmp.rs b/benches/cmp.rs index 0b7bdad..b7c64f6 100644 --- a/benches/cmp.rs +++ b/benches/cmp.rs @@ -3,7 +3,7 @@ extern crate mime; extern crate test; -use mime::{Mime, TEXT_PLAIN_UTF_8}; +use mime::*; use test::Bencher; #[bench] @@ -23,3 +23,24 @@ fn bench_eq_consts(b: &mut Bencher) { assert_eq!(mime, TEXT_PLAIN_UTF_8); }); } + + +#[bench] +fn bench_ne_consts(b: &mut Bencher) { + let one = TEXT_XML; + let two = TEXT_CSS; + b.bytes = one.as_ref().len() as u64; + b.iter(|| { + assert_ne!(one, two); + }); +} + +#[bench] +fn bench_eq_type_(b: &mut Bencher) { + let mime = TEXT_PLAIN_UTF_8; + let name = TEXT; + b.bytes = name.as_ref().len() as u64; + b.iter(|| { + assert_eq!(mime.type_(), name); + }); +} diff --git a/src/lib.rs b/src/lib.rs index b5894dd..59c31ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,18 +5,28 @@ //! //! ## What is Mime? //! -//! Example mime string: `text/plain;charset=utf-8` +//! Example mime string: `text/plain` //! -//! ```rust -//! # #[macro_use] extern crate mime; -//! # fn main() { -//! let plain_text: mime::Mime = "text/plain;charset=utf-8".parse().unwrap(); -//! assert_eq!(plain_text, mime::TEXT_PLAIN_UTF_8); -//! # } +//! ``` +//! let plain_text: mime::Mime = "text/plain".parse().unwrap(); +//! assert_eq!(plain_text, mime::TEXT_PLAIN); +//! ``` +//! +//! ## Inspecting Mimes +//! +//! ``` +//! let mime = mime::TEXT_PLAIN; +//! match (mime.type_(), mime.subtype()) { +//! (mime::TEXT, mime::PLAIN) => println!("plain text!"), +//! (mime::TEXT, _) => println!("structured text"), +//! _ => println!("not text"), +//! } //! ``` #![doc(html_root_url = "https://docs.rs/mime")] -//#![cfg_attr(test, deny(warnings))] +#![deny(warnings)] +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] extern crate unicase; @@ -26,6 +36,7 @@ use std::str::FromStr; mod parse; +/// A parsed mime or media type. #[derive(Clone)] pub struct Mime { source: Source, @@ -34,12 +45,27 @@ pub struct Mime { params: Params, } -#[derive(Clone, Copy)] +/// A section of a `Mime`. +/// +/// For instance, for the Mime `image/svg+xml`, it contains 3 `Name`s, +/// `image`, `svg`, and `xml`. +/// +/// In most cases, `Name`s are compared ignoring case. +#[derive(Clone, Copy, PartialEq, Eq)] pub struct Name<'a> { + // TODO: optimize with an Atom-like thing + // There a `const` Names, and so it is possible for the statis strings + // to havea different memory address. Additionally, when used in match + // statements, the strings are compared with a memcmp, possibly even + // if the address and length are the same. + // + // Being an enum with an Atom variant that is a usize (and without a + // string pointer and boolean) would allow for faster comparisons. source: &'a str, insensitive: bool, } +/// An error when parsing a `Mime` from a string. #[derive(Debug)] pub struct FromStrError { inner: parse::ParseError, @@ -51,14 +77,6 @@ enum Source { Dynamic(String), } -struct Atom(u8); - -impl PartialEq for Atom { - fn eq(&self, other: &Atom) -> bool { - self.0 == other.0 && self.0 != 0 - } -} - impl Source { fn as_ref(&self) -> &str { match *self { @@ -79,6 +97,15 @@ enum Params { struct Str(usize, usize); impl Mime { + /// Get the top level media type for this `Mime`. + /// + /// # Example + /// + /// ``` + /// let mime = mime::TEXT_PLAIN; + /// assert_eq!(mime.type_(), "text"); + /// assert_eq!(mime.type_(), mime::TEXT); + /// ``` #[inline] pub fn type_(&self) -> Name { Name { @@ -87,6 +114,15 @@ impl Mime { } } + /// Get the subtype of this `Mime`. + /// + /// # Example + /// + /// ``` + /// let mime = mime::TEXT_PLAIN; + /// assert_eq!(mime.subtype(), "plain"); + /// assert_eq!(mime.subtype(), mime::PLAIN); + /// ``` #[inline] pub fn subtype(&self) -> Name { let end = self.plus.unwrap_or_else(|| { @@ -98,6 +134,18 @@ impl Mime { } } + /// Get an optional +suffix for this `Mime`. + /// + /// # Example + /// + /// ``` + /// let svg = "image/svg+xml".parse::().unwrap(); + /// assert_eq!(svg.suffix(), Some(mime::XML)); + /// assert_eq!(svg.suffix().unwrap(), "xml"); + /// + /// + /// assert!(mime::TEXT_PLAIN.suffix().is_none()); + /// ``` #[inline] pub fn suffix(&self) -> Option { let end = self.semicolon().unwrap_or(self.source.as_ref().len()); @@ -107,6 +155,19 @@ impl Mime { }) } + /// Look up a parameter by name. + /// + /// # Example + /// + /// ``` + /// let mime = mime::TEXT_PLAIN_UTF_8; + /// assert_eq!(mime.get_param(mime::CHARSET), Some(mime::UTF_8)); + /// assert_eq!(mime.get_param("charset").unwrap(), "utf-8"); + /// assert!(mime.get_param("boundary").is_none()); + /// + /// let mime = "multipart/form-data; boundary=ABCDEFG".parse::().unwrap(); + /// assert_eq!(mime.get_param(mime::BOUNDARY).unwrap(), "ABCDEFG"); + /// ``` pub fn get_param<'a, N>(&'a self, attr: N) -> Option> where N: PartialEq> { match self.params { @@ -145,10 +206,10 @@ impl Mime { } } - fn atom(&self) -> Atom { + fn atom(&self) -> u8 { match self.source { - Source::Atom(a, _) => Atom(a), - _ => Atom(0), + Source::Atom(a, _) => a, + _ => 0, } } } @@ -260,10 +321,15 @@ fn params_eq(semicolon: usize, a: &str, b: &str) -> bool { impl PartialEq for Mime { #[inline] fn eq(&self, other: &Mime) -> bool { - if self.atom() == other.atom() { - true - } else { - mime_eq_str(self, other.source.as_ref()) + match (self.atom(), other.atom()) { + // TODO: + // This could optimize for when there are no customs parameters. + // Any parsed mime has already been lowercased, so if there aren't + // any parameters that are case sensistive, this can skip the + // unicase::eq_ascii, and just use a memcmp instead. + (0, _) | + (_, 0) => mime_eq_str(self, other.source.as_ref()), + (a, b) => a == b, } } } @@ -321,18 +387,6 @@ fn name_eq_str(name: &Name, s: &str) -> bool { } } -impl<'a, 'b> PartialEq> for Name<'a> { - #[inline] - fn eq(&self, other: &Name<'b>) -> bool { - if self.insensitive && other.insensitive { - unicase::eq_ascii(self.source, other.source) - } else { - panic!("ahh"); - } - } -} - - impl<'a, 'b> PartialEq<&'b str> for Name<'a> { #[inline] fn eq(&self, other: & &'b str) -> bool { @@ -371,11 +425,20 @@ impl<'a> fmt::Display for Name<'a> { macro_rules! names { ($($id:ident, $e:expr;)*) => ( $( - pub static $id: Name<'static> = Name { + #[doc = $e] + pub const $id: Name<'static> = Name { source: $e, insensitive: true, }; )* + + #[test] + fn test_names_macro_consts() { + use std::ascii::AsciiExt; + $( + assert_eq!($id.source.to_ascii_lowercase(), $id.source); + )* + } ) } @@ -442,7 +505,7 @@ macro_rules! mimes { )* #[test] - fn test_mimes_consts() { + fn test_mimes_macro_consts() { [ $( mime_constant_test! { @@ -470,6 +533,7 @@ macro_rules! mime_constant { (FULL $id:ident, $src:expr, $slash:expr, $plus:expr, $params:expr) => ( + #[doc = $src] pub const $id: Mime = Mime { source: Source::Atom(__Atoms::$id as u8, $src), slash: $slash, @@ -511,7 +575,7 @@ macro_rules! mime_constant_test { } else { unreachable!(); } - __mime.atom().0 + __mime.atom() }) } @@ -559,6 +623,14 @@ mod tests { assert_eq!(mime.subtype(), HTML); } + #[test] + fn test_matching() { + match (TEXT_PLAIN.type_(), TEXT_PLAIN.subtype()) { + (TEXT, PLAIN) => (), + _ => unreachable!(), + } + } + #[test] fn test_suffix() { assert_eq!(TEXT_PLAIN.suffix(), None); @@ -569,9 +641,9 @@ mod tests { #[test] fn test_mime_fmt() { let mime = TEXT_PLAIN; - assert_eq!(mime.to_string(), "text/plain".to_string()); + assert_eq!(mime.to_string(), "text/plain"); let mime = TEXT_PLAIN_UTF_8; - assert_eq!(mime.to_string(), "text/plain; charset=utf-8".to_string()); + assert_eq!(mime.to_string(), "text/plain; charset=utf-8"); } #[test] @@ -580,11 +652,25 @@ mod tests { assert_eq!(Mime::from_str("TEXT/PLAIN").unwrap(), TEXT_PLAIN); assert_eq!(Mime::from_str("text/plain; charset=utf-8").unwrap(), TEXT_PLAIN_UTF_8); assert_eq!(Mime::from_str("text/plain;charset=\"utf-8\"").unwrap(), TEXT_PLAIN_UTF_8); - assert_eq!(Mime::from_str("text/plain; charset=utf-8; foo=bar").unwrap(), - "text/plain; charset=utf-8; foo=bar"); + + let upper = Mime::from_str("TEXT/PLAIN").unwrap(); + assert_eq!(upper, TEXT_PLAIN); + assert_eq!(upper.type_(), TEXT); + assert_eq!(upper.subtype(), PLAIN); + + + let extended = Mime::from_str("TEXT/PLAIN; CHARSET=UTF-8; FOO=BAR").unwrap(); + assert_eq!(extended, "text/plain; charset=utf-8; foo=BAR"); + assert_eq!(extended.get_param("charset").unwrap(), "utf-8"); + assert_eq!(extended.get_param("foo").unwrap(), "BAR"); + + + // stars assert_eq!("*/*".parse::().unwrap(), STAR_STAR); assert_eq!("image/*".parse::().unwrap(), "image/*"); assert_eq!("text/*; charset=utf-8".parse::().unwrap(), "text/*; charset=utf-8"); + + // parse errors assert!("*/png".parse::().is_err()); assert!("*image/png".parse::().is_err()); assert!("text/*plain".parse::().is_err()); @@ -615,4 +701,23 @@ mod tests { let mime = Mime::from_str("text/plain;charset=\"utf-8\"").unwrap(); assert_eq!(mime.get_param(CHARSET), Some(UTF_8)); } + + #[test] + fn test_name_eq() { + assert_eq!(TEXT, TEXT); + assert_eq!(TEXT, "text"); + assert_eq!("text", TEXT); + assert_eq!(TEXT, "TEXT"); + + let param = Name { + source: "ABC", + insensitive: false, + }; + + assert_eq!(param, param); + assert_eq!(param, "ABC"); + assert_eq!("ABC", param); + assert_ne!(param, "abc"); + assert_ne!("abc", param); + } } diff --git a/src/parse.rs b/src/parse.rs index 3ff9a28..6175982 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,3 +1,4 @@ +use std::ascii::AsciiExt; use std::iter::Enumerate; use std::str::Bytes; @@ -54,7 +55,7 @@ pub fn parse(s: &str) -> Result { Some((i, c)) if !sub_star && i > start && is_restricted_name_char(c) => (), None => { return Ok(Mime { - source: Source::Dynamic(s.to_owned()), + source: Source::Dynamic(s.to_ascii_lowercase()), slash: slash, plus: plus, params: Params::None, @@ -67,8 +68,14 @@ pub fn parse(s: &str) -> Result { // params let params = try!(params_from_str(s, &mut iter, start)); + let src = match params { + Params::Utf8(_) | + Params::None => s.to_ascii_lowercase(), + Params::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices), + }; + Ok(Mime { - source: Source::Dynamic(s.to_owned()), + source: Source::Dynamic(src), slash: slash, plus: plus, params: params, @@ -166,6 +173,23 @@ fn params_from_str(s: &str, iter: &mut Enumerate, mut start: usize) -> Re Ok(params) } +fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Str, Str)]) -> String { + let mut owned = s.to_owned(); + owned[..semi].make_ascii_lowercase(); + + for &(ref name, ref value) in params { + owned[name.0..name.1].make_ascii_lowercase(); + // Since we just converted this part of the string to lowercase, + // we can skip the `Name == &str` unicase check and do a faster + // memcmp instead. + if &owned[name.0..name.1] == CHARSET.source { + owned[value.0..value.1].make_ascii_lowercase(); + } + } + + owned +} + // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2): // // > All registered media types MUST be assigned top-level type and @@ -187,21 +211,83 @@ fn params_from_str(s: &str, iter: &mut Enumerate, mut start: usize) -> Re // > ; specify a facet name // > restricted-name-chars =/ "+" ; Characters after last plus always // > ; specify a structured syntax suffix -// + + +macro_rules! byte_map { + ($($flag:expr,)*) => ([ + $($flag != 0,)* + ]) +} + + +static RESTRICTED_NAME_FIRST: [bool; 256] = byte_map![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +]; + +static RESTRICTED_NAME_CHAR: [bool; 256] = byte_map![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +]; + fn is_restricted_name_first_char(c: u8) -> bool { - match c { - b'a'...b'z' | - b'A'...b'Z' | - b'0'...b'9' => true, - _ => false - } + RESTRICTED_NAME_FIRST[c as usize] } fn is_restricted_name_char(c: u8) -> bool { - if is_restricted_name_first_char(c) { - true - } else { - match c { + RESTRICTED_NAME_CHAR[c as usize] +} + +fn is_restricted_quoted_char(c: u8) -> bool { + c > 31 && c != 127 +} + +#[test] +fn test_lookup_tables() { + for (i, &valid) in RESTRICTED_NAME_FIRST.iter().enumerate() { + let i = i as u8; + let should = match i { + b'a'...b'z' | + b'A'...b'Z' | + b'0'...b'9' => true, + _ => false + }; + assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should); + } + for (i, &valid) in RESTRICTED_NAME_CHAR.iter().enumerate() { + let i = i as u8; + let should = match i { + b'a'...b'z' | + b'A'...b'Z' | + b'0'...b'9' | b'!' | b'#' | b'$' | @@ -212,10 +298,7 @@ fn is_restricted_name_char(c: u8) -> bool { b'+' | b'_' => true, _ => false - } + }; + assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should); } } - -fn is_restricted_quoted_char(c: u8) -> bool { - c > 31 && c != 127 -}