get type and subtype matching working

2026-06-30 21:47:57 -04:00 · 2017-06-07 12:27:41 -07:00
parent 855fa8f446
commit c4032f24a6
3 changed files with 271 additions and 62 deletions
@@ -3,7 +3,7 @@
 extern crate mime;
 extern crate test;

-use mime::{Mime, TEXT_PLAIN_UTF_8};
+use mime::*;
 use test::Bencher;

 #[bench]
@@ -23,3 +23,24 @@ fn bench_eq_consts(b: &mut Bencher) {
        assert_eq!(mime, TEXT_PLAIN_UTF_8);
    });
 }
+
+
+#[bench]
+fn bench_ne_consts(b: &mut Bencher) {
+    let one = TEXT_XML;
+    let two = TEXT_CSS;
+    b.bytes = one.as_ref().len() as u64;
+    b.iter(|| {
+        assert_ne!(one, two);
+    });
+}
+
+#[bench]
+fn bench_eq_type_(b: &mut Bencher) {
+    let mime = TEXT_PLAIN_UTF_8;
+    let name = TEXT;
+    b.bytes = name.as_ref().len() as u64;
+    b.iter(|| {
+        assert_eq!(mime.type_(), name);
+    });
+}
@@ -5,18 +5,28 @@
 //!
 //! ## What is Mime?
 //!
-//! Example mime string: `text/plain;charset=utf-8`
+//! Example mime string: `text/plain`
 //!
-//! ```rust
-//! # #[macro_use] extern crate mime;
-//! # fn main() {
-//! let plain_text: mime::Mime = "text/plain;charset=utf-8".parse().unwrap();
-//! assert_eq!(plain_text, mime::TEXT_PLAIN_UTF_8);
-//! # }
+//! ```
+//! let plain_text: mime::Mime = "text/plain".parse().unwrap();
+//! assert_eq!(plain_text, mime::TEXT_PLAIN);
+//! ```
+//!
+//! ## Inspecting Mimes
+//!
+//! ```
+//! let mime = mime::TEXT_PLAIN;
+//! match (mime.type_(), mime.subtype()) {
+//!     (mime::TEXT, mime::PLAIN) => println!("plain text!"),
+//!     (mime::TEXT, _) => println!("structured text"),
+//!     _ => println!("not text"),
+//! }
 //! ```

 #![doc(html_root_url = "https://docs.rs/mime")]
-//#![cfg_attr(test, deny(warnings))]
+#![deny(warnings)]
+#![deny(missing_docs)]
+#![deny(missing_debug_implementations)]


 extern crate unicase;
@@ -26,6 +36,7 @@ use std::str::FromStr;

 mod parse;

+/// A parsed mime or media type.
 #[derive(Clone)]
 pub struct Mime {
    source: Source,
@@ -34,12 +45,27 @@ pub struct Mime {
    params: Params,
 }

-#[derive(Clone, Copy)]
+/// A section of a `Mime`.
+///
+/// For instance, for the Mime `image/svg+xml`, it contains 3 `Name`s,
+/// `image`, `svg`, and `xml`.
+///
+/// In most cases, `Name`s are compared ignoring case.
+#[derive(Clone, Copy, PartialEq, Eq)]
 pub struct Name<'a> {
+    // TODO: optimize with an Atom-like thing
+    // There a `const` Names, and so it is possible for the statis strings
+    // to havea different memory address. Additionally, when used in match
+    // statements, the strings are compared with a memcmp, possibly even
+    // if the address and length are the same.
+    //
+    // Being an enum with an Atom variant that is a usize (and without a
+    // string pointer and boolean) would allow for faster comparisons.
    source: &'a str,
    insensitive: bool,
 }

+/// An error when parsing a `Mime` from a string.
 #[derive(Debug)]
 pub struct FromStrError {
    inner: parse::ParseError,
@@ -51,14 +77,6 @@ enum Source {
    Dynamic(String),
 }

-struct Atom(u8);
-
-impl PartialEq for Atom {
-    fn eq(&self, other: &Atom) -> bool {
-        self.0 == other.0 && self.0 != 0
-    }
-}
-
 impl Source {
    fn as_ref(&self) -> &str {
        match *self {
@@ -79,6 +97,15 @@ enum Params {
 struct Str(usize, usize);

 impl Mime {
+    /// Get the top level media type for this `Mime`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// let mime = mime::TEXT_PLAIN;
+    /// assert_eq!(mime.type_(), "text");
+    /// assert_eq!(mime.type_(), mime::TEXT);
+    /// ```
    #[inline]
    pub fn type_(&self) -> Name {
        Name {
@@ -87,6 +114,15 @@ impl Mime {
        }
    }

+    /// Get the subtype of this `Mime`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// let mime = mime::TEXT_PLAIN;
+    /// assert_eq!(mime.subtype(), "plain");
+    /// assert_eq!(mime.subtype(), mime::PLAIN);
+    /// ```
    #[inline]
    pub fn subtype(&self) -> Name {
        let end = self.plus.unwrap_or_else(|| {
@@ -98,6 +134,18 @@ impl Mime {
        }
    }

+    /// Get an optional +suffix for this `Mime`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// let svg = "image/svg+xml".parse::<mime::Mime>().unwrap();
+    /// assert_eq!(svg.suffix(), Some(mime::XML));
+    /// assert_eq!(svg.suffix().unwrap(), "xml");
+    ///
+    ///
+    /// assert!(mime::TEXT_PLAIN.suffix().is_none());
+    /// ```
    #[inline]
    pub fn suffix(&self) -> Option<Name> {
        let end = self.semicolon().unwrap_or(self.source.as_ref().len());
@@ -107,6 +155,19 @@ impl Mime {
        })
    }

+    /// Look up a parameter by name.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// let mime = mime::TEXT_PLAIN_UTF_8;
+    /// assert_eq!(mime.get_param(mime::CHARSET), Some(mime::UTF_8));
+    /// assert_eq!(mime.get_param("charset").unwrap(), "utf-8");
+    /// assert!(mime.get_param("boundary").is_none());
+    ///
+    /// let mime = "multipart/form-data; boundary=ABCDEFG".parse::<mime::Mime>().unwrap();
+    /// assert_eq!(mime.get_param(mime::BOUNDARY).unwrap(), "ABCDEFG");
+    /// ```
    pub fn get_param<'a, N>(&'a self, attr: N) -> Option<Name<'a>>
    where N: PartialEq<Name<'a>> {
        match self.params {
@@ -145,10 +206,10 @@ impl Mime {
        }
    }

-    fn atom(&self) -> Atom {
+    fn atom(&self) -> u8 {
        match self.source {
-            Source::Atom(a, _) => Atom(a),
-            _ => Atom(0),
+            Source::Atom(a, _) => a,
+            _ => 0,
        }
    }
 }
@@ -260,10 +321,15 @@ fn params_eq(semicolon: usize, a: &str, b: &str) -> bool {
 impl PartialEq for Mime {
    #[inline]
    fn eq(&self, other: &Mime) -> bool {
-        if self.atom() == other.atom() {
-            true
-        } else {
-            mime_eq_str(self, other.source.as_ref())
+        match (self.atom(), other.atom()) {
+            // TODO:
+            // This could optimize for when there are no customs parameters.
+            // Any parsed mime has already been lowercased, so if there aren't
+            // any parameters that are case sensistive, this can skip the
+            // unicase::eq_ascii, and just use a memcmp instead.
+            (0, _) |
+            (_, 0) => mime_eq_str(self, other.source.as_ref()),
+            (a, b) => a == b,
        }
    }
 }
@@ -321,18 +387,6 @@ fn name_eq_str(name: &Name, s: &str) -> bool {
    }
 }

-impl<'a, 'b> PartialEq<Name<'b>> for Name<'a> {
-    #[inline]
-    fn eq(&self, other: &Name<'b>) -> bool {
-        if self.insensitive && other.insensitive {
-            unicase::eq_ascii(self.source, other.source)
-        } else {
-            panic!("ahh");
-        }
-    }
-}
-
-
 impl<'a, 'b> PartialEq<&'b str> for Name<'a> {
    #[inline]
    fn eq(&self, other: & &'b str) -> bool {
@@ -371,11 +425,20 @@ impl<'a> fmt::Display for Name<'a> {
 macro_rules! names {
    ($($id:ident, $e:expr;)*) => (
        $(
-        pub static $id: Name<'static> = Name {
+        #[doc = $e]
+        pub const $id: Name<'static> = Name {
            source: $e,
            insensitive: true,
        };
        )*
+
+        #[test]
+        fn test_names_macro_consts() {
+            use std::ascii::AsciiExt;
+            $(
+            assert_eq!($id.source.to_ascii_lowercase(), $id.source);
+            )*
+        }
    )
 }

@@ -442,7 +505,7 @@ macro_rules! mimes {
        )*

        #[test]
-        fn test_mimes_consts() {
+        fn test_mimes_macro_consts() {
            [
            $(
            mime_constant_test! {
@@ -470,6 +533,7 @@ macro_rules! mime_constant {


    (FULL $id:ident, $src:expr, $slash:expr, $plus:expr, $params:expr) => (
+        #[doc = $src]
        pub const $id: Mime = Mime {
            source: Source::Atom(__Atoms::$id as u8, $src),
            slash: $slash,
@@ -511,7 +575,7 @@ macro_rules! mime_constant_test {
        } else {
            unreachable!();
        }
-        __mime.atom().0
+        __mime.atom()
    })
 }

@@ -559,6 +623,14 @@ mod tests {
        assert_eq!(mime.subtype(), HTML);
    }

+    #[test]
+    fn test_matching() {
+        match (TEXT_PLAIN.type_(), TEXT_PLAIN.subtype()) {
+            (TEXT, PLAIN) => (),
+            _ => unreachable!(),
+        }
+    }
+
    #[test]
    fn test_suffix() {
        assert_eq!(TEXT_PLAIN.suffix(), None);
@@ -569,9 +641,9 @@ mod tests {
    #[test]
    fn test_mime_fmt() {
        let mime = TEXT_PLAIN;
-        assert_eq!(mime.to_string(), "text/plain".to_string());
+        assert_eq!(mime.to_string(), "text/plain");
        let mime = TEXT_PLAIN_UTF_8;
-        assert_eq!(mime.to_string(), "text/plain; charset=utf-8".to_string());
+        assert_eq!(mime.to_string(), "text/plain; charset=utf-8");
    }

    #[test]
@@ -580,11 +652,25 @@ mod tests {
        assert_eq!(Mime::from_str("TEXT/PLAIN").unwrap(), TEXT_PLAIN);
        assert_eq!(Mime::from_str("text/plain; charset=utf-8").unwrap(), TEXT_PLAIN_UTF_8);
        assert_eq!(Mime::from_str("text/plain;charset=\"utf-8\"").unwrap(), TEXT_PLAIN_UTF_8);
-        assert_eq!(Mime::from_str("text/plain; charset=utf-8; foo=bar").unwrap(),
-            "text/plain; charset=utf-8; foo=bar");
+
+        let upper = Mime::from_str("TEXT/PLAIN").unwrap();
+        assert_eq!(upper, TEXT_PLAIN);
+        assert_eq!(upper.type_(), TEXT);
+        assert_eq!(upper.subtype(), PLAIN);
+
+
+        let extended = Mime::from_str("TEXT/PLAIN; CHARSET=UTF-8; FOO=BAR").unwrap();
+        assert_eq!(extended, "text/plain; charset=utf-8; foo=BAR");
+        assert_eq!(extended.get_param("charset").unwrap(), "utf-8");
+        assert_eq!(extended.get_param("foo").unwrap(), "BAR");
+
+
+        // stars
        assert_eq!("*/*".parse::<Mime>().unwrap(), STAR_STAR);
        assert_eq!("image/*".parse::<Mime>().unwrap(), "image/*");
        assert_eq!("text/*; charset=utf-8".parse::<Mime>().unwrap(), "text/*; charset=utf-8");
+
+        // parse errors
        assert!("*/png".parse::<Mime>().is_err());
        assert!("*image/png".parse::<Mime>().is_err());
        assert!("text/*plain".parse::<Mime>().is_err());
@@ -615,4 +701,23 @@ mod tests {
        let mime = Mime::from_str("text/plain;charset=\"utf-8\"").unwrap();
        assert_eq!(mime.get_param(CHARSET), Some(UTF_8));
    }
+
+    #[test]
+    fn test_name_eq() {
+        assert_eq!(TEXT, TEXT);
+        assert_eq!(TEXT, "text");
+        assert_eq!("text", TEXT);
+        assert_eq!(TEXT, "TEXT");
+
+        let param = Name {
+            source: "ABC",
+            insensitive: false,
+        };
+
+        assert_eq!(param, param);
+        assert_eq!(param, "ABC");
+        assert_eq!("ABC", param);
+        assert_ne!(param, "abc");
+        assert_ne!("abc", param);
+    }
 }
@@ -1,3 +1,4 @@
+use std::ascii::AsciiExt;
 use std::iter::Enumerate;
 use std::str::Bytes;

@@ -54,7 +55,7 @@ pub fn parse(s: &str) -> Result<Mime, ParseError> {
            Some((i, c)) if !sub_star && i > start && is_restricted_name_char(c) => (),
            None => {
                return Ok(Mime {
-                    source: Source::Dynamic(s.to_owned()),
+                    source: Source::Dynamic(s.to_ascii_lowercase()),
                    slash: slash,
                    plus: plus,
                    params: Params::None,
@@ -67,8 +68,14 @@ pub fn parse(s: &str) -> Result<Mime, ParseError> {
    // params
    let params = try!(params_from_str(s, &mut iter, start));

+    let src = match params {
+        Params::Utf8(_) |
+        Params::None => s.to_ascii_lowercase(),
+        Params::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
+    };
+
    Ok(Mime {
-        source: Source::Dynamic(s.to_owned()),
+        source: Source::Dynamic(src),
        slash: slash,
        plus: plus,
        params: params,
@@ -166,6 +173,23 @@ fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Re
    Ok(params)
 }

+fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Str, Str)]) -> String {
+    let mut owned = s.to_owned();
+    owned[..semi].make_ascii_lowercase();
+
+    for &(ref name, ref value) in params {
+        owned[name.0..name.1].make_ascii_lowercase();
+        // Since we just converted this part of the string to lowercase,
+        // we can skip the `Name == &str` unicase check and do a faster
+        // memcmp instead.
+        if &owned[name.0..name.1] == CHARSET.source {
+            owned[value.0..value.1].make_ascii_lowercase();
+        }
+    }
+
+    owned
+}
+
 // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
 //
 // > All registered media types MUST be assigned top-level type and
@@ -187,21 +211,83 @@ fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Re
 // >                                  ; specify a facet name
 // >     restricted-name-chars =/ "+" ; Characters after last plus always
 // >                                  ; specify a structured syntax suffix
-//
+
+
+macro_rules! byte_map {
+    ($($flag:expr,)*) => ([
+        $($flag != 0,)*
+    ])
+}
+
+
+static RESTRICTED_NAME_FIRST: [bool; 256] = byte_map![
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
+static RESTRICTED_NAME_CHAR: [bool; 256] = byte_map![
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
 fn is_restricted_name_first_char(c: u8) -> bool {
-    match c {
-        b'a'...b'z' |
-        b'A'...b'Z' |
-        b'0'...b'9' => true,
-        _ => false
-    }
+    RESTRICTED_NAME_FIRST[c as usize]
 }

 fn is_restricted_name_char(c: u8) -> bool {
-    if is_restricted_name_first_char(c) {
-        true
-    } else {
-        match c {
+    RESTRICTED_NAME_CHAR[c as usize]
+}
+
+fn is_restricted_quoted_char(c: u8) -> bool {
+    c > 31 && c != 127
+}
+
+#[test]
+fn test_lookup_tables() {
+    for (i, &valid) in RESTRICTED_NAME_FIRST.iter().enumerate() {
+        let i = i as u8;
+        let should = match i {
+            b'a'...b'z' |
+            b'A'...b'Z' |
+            b'0'...b'9' => true,
+            _ => false
+        };
+        assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
+    }
+    for (i, &valid) in RESTRICTED_NAME_CHAR.iter().enumerate() {
+        let i = i as u8;
+        let should = match i {
+            b'a'...b'z' |
+            b'A'...b'Z' |
+            b'0'...b'9' |
            b'!' |
            b'#' |
            b'$' |
@@ -212,10 +298,7 @@ fn is_restricted_name_char(c: u8) -> bool {
            b'+' |
            b'_' => true,
            _ => false
-        }
+        };
+        assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
    }
 }
-
-fn is_restricted_quoted_char(c: u8) -> bool {
-    c > 31 && c != 127
-}