From 183eae7be067f3f84887a0d63da5a815808c290b Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Tue, 15 Jun 2021 22:17:27 +0000 Subject: [PATCH] Bug 1716518 - Upgrade ppv-lite86 to v0.2.10. r=emilio Differential Revision: https://phabricator.services.mozilla.com/D117836 --- Cargo.lock | 4 +- .../rust/ppv-lite86/.cargo-checksum.json | 2 +- third_party/rust/ppv-lite86/Cargo.toml | 5 +- third_party/rust/ppv-lite86/src/generic.rs | 69 +++- third_party/rust/ppv-lite86/src/lib.rs | 8 +- third_party/rust/ppv-lite86/src/soft.rs | 11 +- third_party/rust/ppv-lite86/src/types.rs | 337 +++++++++--------- third_party/rust/ppv-lite86/src/x86_64/mod.rs | 23 +- .../rust/ppv-lite86/src/x86_64/sse2.rs | 122 +++---- 9 files changed, 324 insertions(+), 257 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6e499fdf4a05..787471187885 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3902,9 +3902,9 @@ checksum = "b18befed8bc2b61abc79a457295e7e838417326da1586050b919414073977f19" [[package]] name = "ppv-lite86" -version = "0.2.6" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "precomputed-hash" diff --git a/third_party/rust/ppv-lite86/.cargo-checksum.json b/third_party/rust/ppv-lite86/.cargo-checksum.json index 784cd4e54d6b..b4ea624071ad 100644 --- a/third_party/rust/ppv-lite86/.cargo-checksum.json +++ b/third_party/rust/ppv-lite86/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"3cf6fa0e4089c12be1d19ac9082aabed68d3f193dbd5024379c22a8a4db15abe","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6c19b3062aec77a92130be1ce55c90aeca0811bcb19951c25a50c8fbe89a26d0","src/lib.rs":"75beb27d89dcc7541c8e81ad1f4bec81908d8d5fa0e3adec47cb1a1f008dfd32","src/soft.rs":"6fb8aa428183ec09d63d45761507d8da6dffc45990f2d1fcfd387c4c856599cc","src/types.rs":"4890069359ed53575a6b9a8168037ccdd4b029c8d61d540e9770fe3c90359345","src/x86_64/mod.rs":"e95910e8c9d23c212055598a437bfcdfaebf4f12e03a04e75f961bc3e9e257a1","src/x86_64/sse2.rs":"da72424e9e3fabd6236d4de80edb1448dc0bac02797df8e15298d412cdaef10c"},"package":"74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"} \ No newline at end of file +{"files":{"Cargo.toml":"5d9b7092f252e3a6f7f50f6aeb1b873803b322cf5edbf0ae07e0a27d57df3fbf","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6f38250421846499c816c222d0b48155bfab09a9921e6c400d7b75567ab98f14","src/lib.rs":"bcf308d7037e259d6640a785556fcdb86653cb4f72f64fbfeda9899857c14479","src/soft.rs":"5cdee0e46c99a9d5078c0b3a733fe6fd1430ed0a888ef747bc2a1271265a1140","src/types.rs":"a354d2e3267c7c451a1420903314a358328346772ca964fa6c1ef7b96c983930","src/x86_64/mod.rs":"4d5a1da816f8e59bb385464f005075de889d1060e24dcee6709b321a3d6c92f7","src/x86_64/sse2.rs":"a9df3e7b3b8ffcd249a2cbed0e538042f7747dfa6ae7af0c9af364dc5a12d409"},"package":"ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"} \ No newline at end of file diff --git a/third_party/rust/ppv-lite86/Cargo.toml b/third_party/rust/ppv-lite86/Cargo.toml index 012a7f4d75cf..6ffa7bab2ad8 100644 --- a/third_party/rust/ppv-lite86/Cargo.toml +++ b/third_party/rust/ppv-lite86/Cargo.toml @@ -13,7 +13,7 @@ [package] edition = "2018" name = "ppv-lite86" -version = "0.2.6" +version = "0.2.10" authors = ["The CryptoCorrosion Contributors"] description = "Implementation of the crypto-simd API for x86" keywords = ["crypto", "simd", "x86"] @@ -24,7 +24,8 @@ repository = "https://github.com/cryptocorrosion/cryptocorrosion" [dependencies] [features] -default = ["std", "simd"] +default = ["std"] +no_simd = [] simd = [] std = [] [badges.travis-ci] diff --git a/third_party/rust/ppv-lite86/src/generic.rs b/third_party/rust/ppv-lite86/src/generic.rs index 2d0a74cf0870..f0e83d9638e8 100644 --- a/third_party/rust/ppv-lite86/src/generic.rs +++ b/third_party/rust/ppv-lite86/src/generic.rs @@ -1,14 +1,14 @@ #![allow(non_camel_case_types)] -use core::ops::*; use crate::soft::{x2, x4}; use crate::types::*; +use core::ops::*; +#[repr(C)] #[derive(Clone, Copy)] pub union vec128_storage { d: [u32; 4], q: [u64; 2], - o: [u128; 1], } impl From<[u32; 4]> for vec128_storage { #[inline] @@ -16,7 +16,38 @@ impl From<[u32; 4]> for vec128_storage { Self { d } } } -#[derive(Clone, Copy)] +impl From for [u32; 4] { + #[inline] + fn from(d: vec128_storage) -> Self { + unsafe { d.d } + } +} +impl From<[u64; 2]> for vec128_storage { + #[inline] + fn from(q: [u64; 2]) -> Self { + Self { q } + } +} +impl From for [u64; 2] { + #[inline] + fn from(q: vec128_storage) -> Self { + unsafe { q.q } + } +} +impl Default for vec128_storage { + #[inline] + fn default() -> Self { + Self { q: [0, 0] } + } +} +impl Eq for vec128_storage {} +impl PartialEq for vec128_storage { + #[inline] + fn eq(&self, rhs: &Self) -> bool { + unsafe { self.q == rhs.q } + } +} +#[derive(Clone, Copy, PartialEq, Eq, Default)] pub struct vec256_storage { v128: [vec128_storage; 2], } @@ -30,7 +61,15 @@ impl vec256_storage { self.v128 } } -#[derive(Clone, Copy)] +impl From for [u64; 4] { + #[inline] + fn from(q: vec256_storage) -> Self { + let [a, b]: [u64; 2] = q.v128[0].into(); + let [c, d]: [u64; 2] = q.v128[1].into(); + [a, b, c, d] + } +} +#[derive(Clone, Copy, PartialEq, Eq, Default)] pub struct vec512_storage { v128: [vec128_storage; 4], } @@ -106,14 +145,22 @@ where unsafe { T::unpack(q) } } +fn o_of_q(q: [u64; 2]) -> u128 { + u128::from(q[0]) | (u128::from(q[1]) << 64) +} + +fn q_of_o(o: u128) -> [u64; 2] { + [o as u64, (o >> 64) as u64] +} + fn omap(a: T, f: F) -> T where T: Store + Into, F: Fn(u128) -> u128, { let a: vec128_storage = a.into(); - let ao = unsafe { a.o }; - let o = vec128_storage { o: [f(ao[0])] }; + let ao = o_of_q(unsafe { a.q }); + let o = vec128_storage { q: q_of_o(f(ao)) }; unsafe { T::unpack(o) } } @@ -124,10 +171,10 @@ where { let a: vec128_storage = a.into(); let b: vec128_storage = b.into(); - let ao = unsafe { a.o }; - let bo = unsafe { b.o }; + let ao = o_of_q(unsafe { a.q }); + let bo = o_of_q(unsafe { b.q }); let o = vec128_storage { - o: [f(ao[0], bo[0])], + q: q_of_o(f(ao, bo)), }; unsafe { T::unpack(o) } } @@ -411,7 +458,7 @@ impl From for vec128_storage { impl From for vec128_storage { #[inline(always)] fn from(o: u128x1_generic) -> Self { - Self { o: o.0 } + Self { q: q_of_o(o.0[0]) } } } @@ -430,7 +477,7 @@ impl Store for u64x2_generic { impl Store for u128x1_generic { #[inline(always)] unsafe fn unpack(s: vec128_storage) -> Self { - Self(s.o) + Self([o_of_q(s.q); 1]) } } diff --git a/third_party/rust/ppv-lite86/src/lib.rs b/third_party/rust/ppv-lite86/src/lib.rs index 43dc5d869a3f..ea89c5123922 100644 --- a/third_party/rust/ppv-lite86/src/lib.rs +++ b/third_party/rust/ppv-lite86/src/lib.rs @@ -9,14 +9,14 @@ mod soft; mod types; pub use self::types::*; -#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))] +#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))] pub mod x86_64; -#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))] +#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))] use self::x86_64 as arch; -#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))] +#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))] pub mod generic; -#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))] +#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))] use self::generic as arch; pub use self::arch::{vec128_storage, vec256_storage, vec512_storage}; diff --git a/third_party/rust/ppv-lite86/src/soft.rs b/third_party/rust/ppv-lite86/src/soft.rs index d12dac528f16..8976c484ecef 100644 --- a/third_party/rust/ppv-lite86/src/soft.rs +++ b/third_party/rust/ppv-lite86/src/soft.rs @@ -1,9 +1,9 @@ //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. -use core::marker::PhantomData; -use core::ops::*; use crate::types::*; use crate::{vec128_storage, vec256_storage, vec512_storage}; +use core::marker::PhantomData; +use core::ops::*; #[derive(Copy, Clone, Default)] #[allow(non_camel_case_types)] @@ -238,7 +238,12 @@ macro_rules! fwd_unop_x4 { ($fn:ident) => { #[inline(always)] fn $fn(self) -> Self { - x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()]) + x4([ + self.0[0].$fn(), + self.0[1].$fn(), + self.0[2].$fn(), + self.0[3].$fn(), + ]) } }; } diff --git a/third_party/rust/ppv-lite86/src/types.rs b/third_party/rust/ppv-lite86/src/types.rs index 119b6bb8da59..a2826707d383 100644 --- a/third_party/rust/ppv-lite86/src/types.rs +++ b/third_party/rust/ppv-lite86/src/types.rs @@ -1,3 +1,4 @@ +#![allow(non_camel_case_types)] use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not}; pub trait AndNot { @@ -44,182 +45,178 @@ pub trait RotateEachWord64 { pub trait RotateEachWord128 {} -#[allow(non_camel_case_types)] -mod types { - //! Vector type naming scheme: - //! uN[xP]xL - //! Unsigned; N-bit words * P bits per lane * L lanes - //! - //! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of - //! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and - //! slow inter-lane operations. +// Vector type naming scheme: +// uN[xP]xL +// Unsigned; N-bit words * P bits per lane * L lanes +// +// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of +// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and +// slow inter-lane operations. - use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; - use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes}; +use crate::arch::{vec128_storage, vec256_storage, vec512_storage}; - pub trait UnsafeFrom { - unsafe fn unsafe_from(t: T) -> Self; - } - - /// A vector composed of two elements, which may be words or themselves vectors. - pub trait Vec2 { - fn extract(self, i: u32) -> W; - fn insert(self, w: W, i: u32) -> Self; - } - - /// A vector composed of four elements, which may be words or themselves vectors. - pub trait Vec4 { - fn extract(self, i: u32) -> W; - fn insert(self, w: W, i: u32) -> Self; - } - - // TODO: multiples of 4 should inherit this - /// A vector composed of four words; depending on their size, operations may cross lanes. - pub trait Words4 { - fn shuffle1230(self) -> Self; - fn shuffle2301(self) -> Self; - fn shuffle3012(self) -> Self; - } - - /// A vector composed one or more lanes each composed of four words. - pub trait LaneWords4 { - fn shuffle_lane_words1230(self) -> Self; - fn shuffle_lane_words2301(self) -> Self; - fn shuffle_lane_words3012(self) -> Self; - } - - // TODO: make this a part of BitOps - /// Exchange neigboring ranges of bits of the specified size - pub trait Swap64 { - fn swap1(self) -> Self; - fn swap2(self) -> Self; - fn swap4(self) -> Self; - fn swap8(self) -> Self; - fn swap16(self) -> Self; - fn swap32(self) -> Self; - fn swap64(self) -> Self; - } - - pub trait u32x4: - BitOps32 - + Store - + ArithOps - + Vec4 - + Words4 - + LaneWords4 - + StoreBytes - + MultiLane<[u32; 4]> - + Into - { -} - pub trait u64x2: - BitOps64 - + Store - + ArithOps - + Vec2 - + MultiLane<[u64; 2]> - + Into - { -} - pub trait u128x1: - BitOps128 + Store + Swap64 + MultiLane<[u128; 1]> + Into - { +#[allow(clippy::missing_safety_doc)] +pub trait UnsafeFrom { + unsafe fn unsafe_from(t: T) -> Self; } - pub trait u32x4x2: - BitOps32 - + Store - + Vec2 - + MultiLane<[M::u32x4; 2]> - + ArithOps - + Into - { -} - pub trait u64x2x2: - BitOps64 - + Store - + Vec2 - + MultiLane<[M::u64x2; 2]> - + ArithOps - + StoreBytes - + Into - { -} - pub trait u64x4: - BitOps64 - + Store - + Vec4 - + MultiLane<[u64; 4]> - + ArithOps - + Words4 - + StoreBytes - + Into - { -} - pub trait u128x2: - BitOps128 - + Store - + Vec2 - + MultiLane<[M::u128x1; 2]> - + Swap64 - + Into - { +/// A vector composed of two elements, which may be words or themselves vectors. +pub trait Vec2 { + fn extract(self, i: u32) -> W; + fn insert(self, w: W, i: u32) -> Self; } - pub trait u32x4x4: - BitOps32 - + Store - + Vec4 - + MultiLane<[M::u32x4; 4]> - + ArithOps - + LaneWords4 - + Into - { -} - pub trait u64x2x4: - BitOps64 - + Store - + Vec4 - + MultiLane<[M::u64x2; 4]> - + ArithOps - + Into - { -} - // TODO: Words4 - pub trait u128x4: - BitOps128 - + Store - + Vec4 - + MultiLane<[M::u128x1; 4]> - + Swap64 - + Into - { +/// A vector composed of four elements, which may be words or themselves vectors. +pub trait Vec4 { + fn extract(self, i: u32) -> W; + fn insert(self, w: W, i: u32) -> Self; } - /// A vector composed of multiple 128-bit lanes. - pub trait MultiLane { - /// Split a multi-lane vector into single-lane vectors. - fn to_lanes(self) -> Lanes; - /// Build a multi-lane vector from individual lanes. - fn from_lanes(lanes: Lanes) -> Self; - } +// TODO: multiples of 4 should inherit this +/// A vector composed of four words; depending on their size, operations may cross lanes. +pub trait Words4 { + fn shuffle1230(self) -> Self; + fn shuffle2301(self) -> Self; + fn shuffle3012(self) -> Self; +} - /// Combine single vectors into a multi-lane vector. - pub trait VZip { - fn vzip(self) -> V; - } +/// A vector composed one or more lanes each composed of four words. +pub trait LaneWords4 { + fn shuffle_lane_words1230(self) -> Self; + fn shuffle_lane_words2301(self) -> Self; + fn shuffle_lane_words3012(self) -> Self; +} - impl VZip for T - where - V: MultiLane, - { - #[inline(always)] - fn vzip(self) -> V { - V::from_lanes(self) - } +// TODO: make this a part of BitOps +/// Exchange neigboring ranges of bits of the specified size +pub trait Swap64 { + fn swap1(self) -> Self; + fn swap2(self) -> Self; + fn swap4(self) -> Self; + fn swap8(self) -> Self; + fn swap16(self) -> Self; + fn swap32(self) -> Self; + fn swap64(self) -> Self; +} + +pub trait u32x4: + BitOps32 + + Store + + ArithOps + + Vec4 + + Words4 + + LaneWords4 + + StoreBytes + + MultiLane<[u32; 4]> + + Into +{ +} +pub trait u64x2: + BitOps64 + + Store + + ArithOps + + Vec2 + + MultiLane<[u64; 2]> + + Into +{ +} +pub trait u128x1: + BitOps128 + Store + Swap64 + MultiLane<[u128; 1]> + Into +{ +} + +pub trait u32x4x2: + BitOps32 + + Store + + Vec2 + + MultiLane<[M::u32x4; 2]> + + ArithOps + + Into +{ +} +pub trait u64x2x2: + BitOps64 + + Store + + Vec2 + + MultiLane<[M::u64x2; 2]> + + ArithOps + + StoreBytes + + Into +{ +} +pub trait u64x4: + BitOps64 + + Store + + Vec4 + + MultiLane<[u64; 4]> + + ArithOps + + Words4 + + StoreBytes + + Into +{ +} +pub trait u128x2: + BitOps128 + + Store + + Vec2 + + MultiLane<[M::u128x1; 2]> + + Swap64 + + Into +{ +} + +pub trait u32x4x4: + BitOps32 + + Store + + Vec4 + + MultiLane<[M::u32x4; 4]> + + ArithOps + + LaneWords4 + + Into +{ +} +pub trait u64x2x4: + BitOps64 + + Store + + Vec4 + + MultiLane<[M::u64x2; 4]> + + ArithOps + + Into +{ +} +// TODO: Words4 +pub trait u128x4: + BitOps128 + + Store + + Vec4 + + MultiLane<[M::u128x1; 4]> + + Swap64 + + Into +{ +} + +/// A vector composed of multiple 128-bit lanes. +pub trait MultiLane { + /// Split a multi-lane vector into single-lane vectors. + fn to_lanes(self) -> Lanes; + /// Build a multi-lane vector from individual lanes. + fn from_lanes(lanes: Lanes) -> Self; +} + +/// Combine single vectors into a multi-lane vector. +pub trait VZip { + fn vzip(self) -> V; +} + +impl VZip for T +where + V: MultiLane, +{ + #[inline(always)] + fn vzip(self) -> V { + V::from_lanes(self) } } -pub use self::types::*; pub trait Machine: Sized + Copy { type u32x4: u32x4; @@ -264,15 +261,27 @@ pub trait Machine: Sized + Copy { unsafe { V::unsafe_read_be(input) } } + /// # Safety + /// Caller must ensure the type of Self is appropriate for the hardware of the execution + /// environment. unsafe fn instance() -> Self; } pub trait Store { + /// # Safety + /// Caller must ensure the type of Self is appropriate for the hardware of the execution + /// environment. unsafe fn unpack(p: S) -> Self; } pub trait StoreBytes { + /// # Safety + /// Caller must ensure the type of Self is appropriate for the hardware of the execution + /// environment. unsafe fn unsafe_read_le(input: &[u8]) -> Self; + /// # Safety + /// Caller must ensure the type of Self is appropriate for the hardware of the execution + /// environment. unsafe fn unsafe_read_be(input: &[u8]) -> Self; fn write_le(self, out: &mut [u8]); fn write_be(self, out: &mut [u8]); diff --git a/third_party/rust/ppv-lite86/src/x86_64/mod.rs b/third_party/rust/ppv-lite86/src/x86_64/mod.rs index 39d3b900648c..d7455d0c6605 100644 --- a/third_party/rust/ppv-lite86/src/x86_64/mod.rs +++ b/third_party/rust/ppv-lite86/src/x86_64/mod.rs @@ -1,7 +1,7 @@ // crate minimums: sse2, x86_64 -use core::arch::x86_64::{__m128i, __m256i}; use crate::types::*; +use core::arch::x86_64::{__m128i, __m256i}; mod sse2; @@ -137,6 +137,13 @@ impl Default for vec128_storage { vec128_storage { u128x1: [0] } } } +impl Eq for vec128_storage {} +impl PartialEq for vec128_storage { + #[inline(always)] + fn eq(&self, rhs: &Self) -> bool { + unsafe { self.u128x1 == rhs.u128x1 } + } +} #[allow(non_camel_case_types)] #[derive(Copy, Clone)] @@ -167,6 +174,13 @@ impl vec256_storage { unsafe { self.sse2 } } } +impl Eq for vec256_storage {} +impl PartialEq for vec256_storage { + #[inline(always)] + fn eq(&self, rhs: &Self) -> bool { + unsafe { self.sse2 == rhs.sse2 } + } +} #[allow(non_camel_case_types)] #[derive(Copy, Clone)] @@ -193,6 +207,13 @@ impl vec512_storage { unsafe { self.sse2 } } } +impl Eq for vec512_storage {} +impl PartialEq for vec512_storage { + #[inline(always)] + fn eq(&self, rhs: &Self) -> bool { + unsafe { self.avx == rhs.avx } + } +} macro_rules! impl_into { ($storage:ident, $array:ty, $name:ident) => { diff --git a/third_party/rust/ppv-lite86/src/x86_64/sse2.rs b/third_party/rust/ppv-lite86/src/x86_64/sse2.rs index 81021a99e9ed..bf0063fb64dc 100644 --- a/third_party/rust/ppv-lite86/src/x86_64/sse2.rs +++ b/third_party/rust/ppv-lite86/src/x86_64/sse2.rs @@ -166,28 +166,23 @@ macro_rules! impl_bitops128 { macro_rules! rotr_32_s3 { ($name:ident, $k0:expr, $k1:expr) => { - #[inline(always)] - fn $name(self) -> Self { - Self::new(unsafe { - _mm_shuffle_epi8( - self.x, - _mm_set_epi64x($k0, $k1), - ) - }) + #[inline(always)] + fn $name(self) -> Self { + Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) }) } }; } macro_rules! rotr_32 { ($name:ident, $i:expr) => { - #[inline(always)] - fn $name(self) -> Self { - Self::new(unsafe { - _mm_or_si128( - _mm_srli_epi32(self.x, $i as i32), - _mm_slli_epi32(self.x, 32 - $i as i32), - ) - }) - } + #[inline(always)] + fn $name(self) -> Self { + Self::new(unsafe { + _mm_or_si128( + _mm_srli_epi32(self.x, $i as i32), + _mm_slli_epi32(self.x, 32 - $i as i32), + ) + }) + } }; } impl RotateEachWord32 for u32x4_sse2 { @@ -228,28 +223,23 @@ impl RotateEachWord32 for u32x4_sse2 { macro_rules! rotr_64_s3 { ($name:ident, $k0:expr, $k1:expr) => { - #[inline(always)] - fn $name(self) -> Self { - Self::new(unsafe { - _mm_shuffle_epi8( - self.x, - _mm_set_epi64x($k0, $k1), - ) - }) + #[inline(always)] + fn $name(self) -> Self { + Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) }) } }; } macro_rules! rotr_64 { ($name:ident, $i:expr) => { - #[inline(always)] - fn $name(self) -> Self { - Self::new(unsafe { - _mm_or_si128( - _mm_srli_epi64(self.x, $i as i32), - _mm_slli_epi64(self.x, 64 - $i as i32), - ) - }) - } + #[inline(always)] + fn $name(self) -> Self { + Self::new(unsafe { + _mm_or_si128( + _mm_srli_epi64(self.x, $i as i32), + _mm_slli_epi64(self.x, 64 - $i as i32), + ) + }) + } }; } impl RotateEachWord32 for u64x2_sse2 { @@ -296,15 +286,15 @@ impl RotateEachWord64 for u64x2_sse2 { macro_rules! rotr_128 { ($name:ident, $i:expr) => { - #[inline(always)] - fn $name(self) -> Self { - Self::new(unsafe { - _mm_or_si128( - _mm_srli_si128(self.x, $i as i32), - _mm_slli_si128(self.x, 128 - $i as i32), - ) - }) - } + #[inline(always)] + fn $name(self) -> Self { + Self::new(unsafe { + _mm_or_si128( + _mm_srli_si128(self.x, $i as i32), + _mm_slli_si128(self.x, 128 - $i as i32), + ) + }) + } }; } // TODO: completely unoptimized @@ -411,7 +401,7 @@ impl MultiLane<[u128; 1]> for u128x1_sse2 { } #[inline(always)] fn from_lanes(xs: [u128; 1]) -> Self { - unimplemented!() + unimplemented!("{:?}", xs) } } @@ -780,7 +770,7 @@ impl BSwap for u128x1_sse2 { impl BSwap for u128x1_sse2 { #[inline(always)] fn bswap(self) -> Self { - Self::new(unsafe { unimplemented!() }) + unimplemented!() } } @@ -1078,6 +1068,7 @@ impl PartialEq for x2 { } } +#[allow(unused)] #[inline(always)] unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool { let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110); @@ -1136,13 +1127,14 @@ where } #[cfg(test)] +#[cfg(target_arch = "x86_64")] mod test { use super::*; use crate::x86_64::{SSE2, SSE41, SSSE3}; use crate::Machine; #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(target_feature = "ssse3"), ignore)] fn test_bswap32_s2_vs_s3() { let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100]; let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203]; @@ -1165,7 +1157,7 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(target_feature = "ssse3"), ignore)] fn test_bswap64_s2_vs_s3() { let xs = [0x0f0e_0d0c_0b0a_0908, 0x0706_0504_0302_0100]; let ys = [0x0809_0a0b_0c0d_0e0f, 0x0001_0203_0405_0607]; @@ -1188,7 +1180,7 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(target_feature = "ssse3"), ignore)] fn test_shuffle32_s2_vs_s3() { let xs = [0x0, 0x1, 0x2, 0x3]; let ys = [0x2, 0x3, 0x0, 0x1]; @@ -1226,7 +1218,7 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(target_feature = "ssse3"), ignore)] fn test_shuffle64_s2_vs_s3() { let xs = [0x0, 0x1, 0x2, 0x3]; let ys = [0x2, 0x3, 0x0, 0x1]; @@ -1263,8 +1255,8 @@ mod test { assert_eq!(x_s3, unsafe { core::mem::transmute(x_s3) }); } + #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)] #[test] - #[cfg(target_arch = "x86_64")] fn test_lanes_u32x4() { let xs = [0x1, 0x2, 0x3, 0x4]; @@ -1295,7 +1287,7 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)] fn test_lanes_u64x2() { let xs = [0x1, 0x2]; @@ -1326,7 +1318,6 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] fn test_vec4_u32x4_s2() { let xs = [1, 2, 3, 4]; let s2 = unsafe { SSE2::instance() }; @@ -1342,7 +1333,7 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)] fn test_vec4_u32x4_s4() { let xs = [1, 2, 3, 4]; let s4 = unsafe { SSE41::instance() }; @@ -1358,7 +1349,6 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] fn test_vec2_u64x2_s2() { let xs = [0x1, 0x2]; let s2 = unsafe { SSE2::instance() }; @@ -1370,7 +1360,7 @@ mod test { } #[test] - #[cfg(target_arch = "x86_64")] + #[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)] fn test_vec4_u64x2_s4() { let xs = [0x1, 0x2]; let s4 = unsafe { SSE41::instance() }; @@ -1493,19 +1483,13 @@ pub mod avx2 { impl ArithOps for u32x4x4_avx2 where NI: Copy {} macro_rules! shuf_lane_bytes { ($name:ident, $k0:expr, $k1:expr) => { - #[inline(always)] - fn $name(self) -> Self { - Self::new(unsafe { - [ - _mm256_shuffle_epi8( - self.x[0], - _mm256_set_epi64x($k0, $k1, $k0, $k1), - ), - _mm256_shuffle_epi8( - self.x[1], - _mm256_set_epi64x($k0, $k1, $k0, $k1), - ) - ] + #[inline(always)] + fn $name(self) -> Self { + Self::new(unsafe { + [ + _mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)), + _mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)), + ] }) } }; @@ -1523,7 +1507,7 @@ pub mod avx2 { _mm256_or_si256( _mm256_srli_epi32(self.x[1], $i as i32), _mm256_slli_epi32(self.x[1], 32 - $i as i32), - ) + ), ] }) }