Bug 1716518 - Upgrade ppv-lite86 to v0.2.10. r=emilio

Differential Revision: https://phabricator.services.mozilla.com/D117836
This commit is contained in:
Mike Hommey 2021-06-15 22:17:27 +00:00
parent 9a527850ed
commit 183eae7be0
9 changed files with 324 additions and 257 deletions

4
Cargo.lock generated
View File

@ -3902,9 +3902,9 @@ checksum = "b18befed8bc2b61abc79a457295e7e838417326da1586050b919414073977f19"
[[package]]
name = "ppv-lite86"
version = "0.2.6"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
[[package]]
name = "precomputed-hash"

View File

@ -1 +1 @@
{"files":{"Cargo.toml":"3cf6fa0e4089c12be1d19ac9082aabed68d3f193dbd5024379c22a8a4db15abe","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6c19b3062aec77a92130be1ce55c90aeca0811bcb19951c25a50c8fbe89a26d0","src/lib.rs":"75beb27d89dcc7541c8e81ad1f4bec81908d8d5fa0e3adec47cb1a1f008dfd32","src/soft.rs":"6fb8aa428183ec09d63d45761507d8da6dffc45990f2d1fcfd387c4c856599cc","src/types.rs":"4890069359ed53575a6b9a8168037ccdd4b029c8d61d540e9770fe3c90359345","src/x86_64/mod.rs":"e95910e8c9d23c212055598a437bfcdfaebf4f12e03a04e75f961bc3e9e257a1","src/x86_64/sse2.rs":"da72424e9e3fabd6236d4de80edb1448dc0bac02797df8e15298d412cdaef10c"},"package":"74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"}
{"files":{"Cargo.toml":"5d9b7092f252e3a6f7f50f6aeb1b873803b322cf5edbf0ae07e0a27d57df3fbf","LICENSE-APACHE":"0218327e7a480793ffdd4eb792379a9709e5c135c7ba267f709d6f6d4d70af0a","LICENSE-MIT":"4cada0bd02ea3692eee6f16400d86c6508bbd3bafb2b65fed0419f36d4f83e8f","src/generic.rs":"6f38250421846499c816c222d0b48155bfab09a9921e6c400d7b75567ab98f14","src/lib.rs":"bcf308d7037e259d6640a785556fcdb86653cb4f72f64fbfeda9899857c14479","src/soft.rs":"5cdee0e46c99a9d5078c0b3a733fe6fd1430ed0a888ef747bc2a1271265a1140","src/types.rs":"a354d2e3267c7c451a1420903314a358328346772ca964fa6c1ef7b96c983930","src/x86_64/mod.rs":"4d5a1da816f8e59bb385464f005075de889d1060e24dcee6709b321a3d6c92f7","src/x86_64/sse2.rs":"a9df3e7b3b8ffcd249a2cbed0e538042f7747dfa6ae7af0c9af364dc5a12d409"},"package":"ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"}

View File

@ -13,7 +13,7 @@
[package]
edition = "2018"
name = "ppv-lite86"
version = "0.2.6"
version = "0.2.10"
authors = ["The CryptoCorrosion Contributors"]
description = "Implementation of the crypto-simd API for x86"
keywords = ["crypto", "simd", "x86"]
@ -24,7 +24,8 @@ repository = "https://github.com/cryptocorrosion/cryptocorrosion"
[dependencies]
[features]
default = ["std", "simd"]
default = ["std"]
no_simd = []
simd = []
std = []
[badges.travis-ci]

View File

@ -1,14 +1,14 @@
#![allow(non_camel_case_types)]
use core::ops::*;
use crate::soft::{x2, x4};
use crate::types::*;
use core::ops::*;
#[repr(C)]
#[derive(Clone, Copy)]
pub union vec128_storage {
d: [u32; 4],
q: [u64; 2],
o: [u128; 1],
}
impl From<[u32; 4]> for vec128_storage {
#[inline]
@ -16,7 +16,38 @@ impl From<[u32; 4]> for vec128_storage {
Self { d }
}
}
#[derive(Clone, Copy)]
impl From<vec128_storage> for [u32; 4] {
#[inline]
fn from(d: vec128_storage) -> Self {
unsafe { d.d }
}
}
impl From<[u64; 2]> for vec128_storage {
#[inline]
fn from(q: [u64; 2]) -> Self {
Self { q }
}
}
impl From<vec128_storage> for [u64; 2] {
#[inline]
fn from(q: vec128_storage) -> Self {
unsafe { q.q }
}
}
impl Default for vec128_storage {
#[inline]
fn default() -> Self {
Self { q: [0, 0] }
}
}
impl Eq for vec128_storage {}
impl PartialEq<vec128_storage> for vec128_storage {
#[inline]
fn eq(&self, rhs: &Self) -> bool {
unsafe { self.q == rhs.q }
}
}
#[derive(Clone, Copy, PartialEq, Eq, Default)]
pub struct vec256_storage {
v128: [vec128_storage; 2],
}
@ -30,7 +61,15 @@ impl vec256_storage {
self.v128
}
}
#[derive(Clone, Copy)]
impl From<vec256_storage> for [u64; 4] {
#[inline]
fn from(q: vec256_storage) -> Self {
let [a, b]: [u64; 2] = q.v128[0].into();
let [c, d]: [u64; 2] = q.v128[1].into();
[a, b, c, d]
}
}
#[derive(Clone, Copy, PartialEq, Eq, Default)]
pub struct vec512_storage {
v128: [vec128_storage; 4],
}
@ -106,14 +145,22 @@ where
unsafe { T::unpack(q) }
}
fn o_of_q(q: [u64; 2]) -> u128 {
u128::from(q[0]) | (u128::from(q[1]) << 64)
}
fn q_of_o(o: u128) -> [u64; 2] {
[o as u64, (o >> 64) as u64]
}
fn omap<T, F>(a: T, f: F) -> T
where
T: Store<vec128_storage> + Into<vec128_storage>,
F: Fn(u128) -> u128,
{
let a: vec128_storage = a.into();
let ao = unsafe { a.o };
let o = vec128_storage { o: [f(ao[0])] };
let ao = o_of_q(unsafe { a.q });
let o = vec128_storage { q: q_of_o(f(ao)) };
unsafe { T::unpack(o) }
}
@ -124,10 +171,10 @@ where
{
let a: vec128_storage = a.into();
let b: vec128_storage = b.into();
let ao = unsafe { a.o };
let bo = unsafe { b.o };
let ao = o_of_q(unsafe { a.q });
let bo = o_of_q(unsafe { b.q });
let o = vec128_storage {
o: [f(ao[0], bo[0])],
q: q_of_o(f(ao, bo)),
};
unsafe { T::unpack(o) }
}
@ -411,7 +458,7 @@ impl From<u64x2_generic> for vec128_storage {
impl From<u128x1_generic> for vec128_storage {
#[inline(always)]
fn from(o: u128x1_generic) -> Self {
Self { o: o.0 }
Self { q: q_of_o(o.0[0]) }
}
}
@ -430,7 +477,7 @@ impl Store<vec128_storage> for u64x2_generic {
impl Store<vec128_storage> for u128x1_generic {
#[inline(always)]
unsafe fn unpack(s: vec128_storage) -> Self {
Self(s.o)
Self([o_of_q(s.q); 1])
}
}

View File

@ -9,14 +9,14 @@ mod soft;
mod types;
pub use self::types::*;
#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
pub mod x86_64;
#[cfg(all(feature = "simd", target_arch = "x86_64", not(miri)))]
#[cfg(all(target_arch = "x86_64", not(feature = "no_simd"), not(miri)))]
use self::x86_64 as arch;
#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
pub mod generic;
#[cfg(any(miri, not(all(feature = "simd", any(target_arch = "x86_64")))))]
#[cfg(any(feature = "no_simd", miri, not(target_arch = "x86_64")))]
use self::generic as arch;
pub use self::arch::{vec128_storage, vec256_storage, vec512_storage};

View File

@ -1,9 +1,9 @@
//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
use core::marker::PhantomData;
use core::ops::*;
use crate::types::*;
use crate::{vec128_storage, vec256_storage, vec512_storage};
use core::marker::PhantomData;
use core::ops::*;
#[derive(Copy, Clone, Default)]
#[allow(non_camel_case_types)]
@ -238,7 +238,12 @@ macro_rules! fwd_unop_x4 {
($fn:ident) => {
#[inline(always)]
fn $fn(self) -> Self {
x4([self.0[0].$fn(), self.0[1].$fn(), self.0[2].$fn(), self.0[3].$fn()])
x4([
self.0[0].$fn(),
self.0[1].$fn(),
self.0[2].$fn(),
self.0[3].$fn(),
])
}
};
}

View File

@ -1,3 +1,4 @@
#![allow(non_camel_case_types)]
use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
pub trait AndNot {
@ -44,182 +45,178 @@ pub trait RotateEachWord64 {
pub trait RotateEachWord128 {}
#[allow(non_camel_case_types)]
mod types {
//! Vector type naming scheme:
//! uN[xP]xL
//! Unsigned; N-bit words * P bits per lane * L lanes
//!
//! A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
//! wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
//! slow inter-lane operations.
// Vector type naming scheme:
// uN[xP]xL
// Unsigned; N-bit words * P bits per lane * L lanes
//
// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
// slow inter-lane operations.
use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
use crate::{ArithOps, BitOps128, BitOps32, BitOps64, Machine, Store, StoreBytes};
use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
pub trait UnsafeFrom<T> {
unsafe fn unsafe_from(t: T) -> Self;
}
/// A vector composed of two elements, which may be words or themselves vectors.
pub trait Vec2<W> {
fn extract(self, i: u32) -> W;
fn insert(self, w: W, i: u32) -> Self;
}
/// A vector composed of four elements, which may be words or themselves vectors.
pub trait Vec4<W> {
fn extract(self, i: u32) -> W;
fn insert(self, w: W, i: u32) -> Self;
}
// TODO: multiples of 4 should inherit this
/// A vector composed of four words; depending on their size, operations may cross lanes.
pub trait Words4 {
fn shuffle1230(self) -> Self;
fn shuffle2301(self) -> Self;
fn shuffle3012(self) -> Self;
}
/// A vector composed one or more lanes each composed of four words.
pub trait LaneWords4 {
fn shuffle_lane_words1230(self) -> Self;
fn shuffle_lane_words2301(self) -> Self;
fn shuffle_lane_words3012(self) -> Self;
}
// TODO: make this a part of BitOps
/// Exchange neigboring ranges of bits of the specified size
pub trait Swap64 {
fn swap1(self) -> Self;
fn swap2(self) -> Self;
fn swap4(self) -> Self;
fn swap8(self) -> Self;
fn swap16(self) -> Self;
fn swap32(self) -> Self;
fn swap64(self) -> Self;
}
pub trait u32x4<M: Machine>:
BitOps32
+ Store<vec128_storage>
+ ArithOps
+ Vec4<u32>
+ Words4
+ LaneWords4
+ StoreBytes
+ MultiLane<[u32; 4]>
+ Into<vec128_storage>
{
}
pub trait u64x2<M: Machine>:
BitOps64
+ Store<vec128_storage>
+ ArithOps
+ Vec2<u64>
+ MultiLane<[u64; 2]>
+ Into<vec128_storage>
{
}
pub trait u128x1<M: Machine>:
BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
{
#[allow(clippy::missing_safety_doc)]
pub trait UnsafeFrom<T> {
unsafe fn unsafe_from(t: T) -> Self;
}
pub trait u32x4x2<M: Machine>:
BitOps32
+ Store<vec256_storage>
+ Vec2<M::u32x4>
+ MultiLane<[M::u32x4; 2]>
+ ArithOps
+ Into<vec256_storage>
{
}
pub trait u64x2x2<M: Machine>:
BitOps64
+ Store<vec256_storage>
+ Vec2<M::u64x2>
+ MultiLane<[M::u64x2; 2]>
+ ArithOps
+ StoreBytes
+ Into<vec256_storage>
{
}
pub trait u64x4<M: Machine>:
BitOps64
+ Store<vec256_storage>
+ Vec4<u64>
+ MultiLane<[u64; 4]>
+ ArithOps
+ Words4
+ StoreBytes
+ Into<vec256_storage>
{
}
pub trait u128x2<M: Machine>:
BitOps128
+ Store<vec256_storage>
+ Vec2<M::u128x1>
+ MultiLane<[M::u128x1; 2]>
+ Swap64
+ Into<vec256_storage>
{
/// A vector composed of two elements, which may be words or themselves vectors.
pub trait Vec2<W> {
fn extract(self, i: u32) -> W;
fn insert(self, w: W, i: u32) -> Self;
}
pub trait u32x4x4<M: Machine>:
BitOps32
+ Store<vec512_storage>
+ Vec4<M::u32x4>
+ MultiLane<[M::u32x4; 4]>
+ ArithOps
+ LaneWords4
+ Into<vec512_storage>
{
}
pub trait u64x2x4<M: Machine>:
BitOps64
+ Store<vec512_storage>
+ Vec4<M::u64x2>
+ MultiLane<[M::u64x2; 4]>
+ ArithOps
+ Into<vec512_storage>
{
}
// TODO: Words4
pub trait u128x4<M: Machine>:
BitOps128
+ Store<vec512_storage>
+ Vec4<M::u128x1>
+ MultiLane<[M::u128x1; 4]>
+ Swap64
+ Into<vec512_storage>
{
/// A vector composed of four elements, which may be words or themselves vectors.
pub trait Vec4<W> {
fn extract(self, i: u32) -> W;
fn insert(self, w: W, i: u32) -> Self;
}
/// A vector composed of multiple 128-bit lanes.
pub trait MultiLane<Lanes> {
/// Split a multi-lane vector into single-lane vectors.
fn to_lanes(self) -> Lanes;
/// Build a multi-lane vector from individual lanes.
fn from_lanes(lanes: Lanes) -> Self;
}
// TODO: multiples of 4 should inherit this
/// A vector composed of four words; depending on their size, operations may cross lanes.
pub trait Words4 {
fn shuffle1230(self) -> Self;
fn shuffle2301(self) -> Self;
fn shuffle3012(self) -> Self;
}
/// Combine single vectors into a multi-lane vector.
pub trait VZip<V> {
fn vzip(self) -> V;
}
/// A vector composed one or more lanes each composed of four words.
pub trait LaneWords4 {
fn shuffle_lane_words1230(self) -> Self;
fn shuffle_lane_words2301(self) -> Self;
fn shuffle_lane_words3012(self) -> Self;
}
impl<V, T> VZip<V> for T
where
V: MultiLane<T>,
{
#[inline(always)]
fn vzip(self) -> V {
V::from_lanes(self)
}
// TODO: make this a part of BitOps
/// Exchange neigboring ranges of bits of the specified size
pub trait Swap64 {
fn swap1(self) -> Self;
fn swap2(self) -> Self;
fn swap4(self) -> Self;
fn swap8(self) -> Self;
fn swap16(self) -> Self;
fn swap32(self) -> Self;
fn swap64(self) -> Self;
}
pub trait u32x4<M: Machine>:
BitOps32
+ Store<vec128_storage>
+ ArithOps
+ Vec4<u32>
+ Words4
+ LaneWords4
+ StoreBytes
+ MultiLane<[u32; 4]>
+ Into<vec128_storage>
{
}
pub trait u64x2<M: Machine>:
BitOps64
+ Store<vec128_storage>
+ ArithOps
+ Vec2<u64>
+ MultiLane<[u64; 2]>
+ Into<vec128_storage>
{
}
pub trait u128x1<M: Machine>:
BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
{
}
pub trait u32x4x2<M: Machine>:
BitOps32
+ Store<vec256_storage>
+ Vec2<M::u32x4>
+ MultiLane<[M::u32x4; 2]>
+ ArithOps
+ Into<vec256_storage>
{
}
pub trait u64x2x2<M: Machine>:
BitOps64
+ Store<vec256_storage>
+ Vec2<M::u64x2>
+ MultiLane<[M::u64x2; 2]>
+ ArithOps
+ StoreBytes
+ Into<vec256_storage>
{
}
pub trait u64x4<M: Machine>:
BitOps64
+ Store<vec256_storage>
+ Vec4<u64>
+ MultiLane<[u64; 4]>
+ ArithOps
+ Words4
+ StoreBytes
+ Into<vec256_storage>
{
}
pub trait u128x2<M: Machine>:
BitOps128
+ Store<vec256_storage>
+ Vec2<M::u128x1>
+ MultiLane<[M::u128x1; 2]>
+ Swap64
+ Into<vec256_storage>
{
}
pub trait u32x4x4<M: Machine>:
BitOps32
+ Store<vec512_storage>
+ Vec4<M::u32x4>
+ MultiLane<[M::u32x4; 4]>
+ ArithOps
+ LaneWords4
+ Into<vec512_storage>
{
}
pub trait u64x2x4<M: Machine>:
BitOps64
+ Store<vec512_storage>
+ Vec4<M::u64x2>
+ MultiLane<[M::u64x2; 4]>
+ ArithOps
+ Into<vec512_storage>
{
}
// TODO: Words4
pub trait u128x4<M: Machine>:
BitOps128
+ Store<vec512_storage>
+ Vec4<M::u128x1>
+ MultiLane<[M::u128x1; 4]>
+ Swap64
+ Into<vec512_storage>
{
}
/// A vector composed of multiple 128-bit lanes.
pub trait MultiLane<Lanes> {
/// Split a multi-lane vector into single-lane vectors.
fn to_lanes(self) -> Lanes;
/// Build a multi-lane vector from individual lanes.
fn from_lanes(lanes: Lanes) -> Self;
}
/// Combine single vectors into a multi-lane vector.
pub trait VZip<V> {
fn vzip(self) -> V;
}
impl<V, T> VZip<V> for T
where
V: MultiLane<T>,
{
#[inline(always)]
fn vzip(self) -> V {
V::from_lanes(self)
}
}
pub use self::types::*;
pub trait Machine: Sized + Copy {
type u32x4: u32x4<Self>;
@ -264,15 +261,27 @@ pub trait Machine: Sized + Copy {
unsafe { V::unsafe_read_be(input) }
}
/// # Safety
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
/// environment.
unsafe fn instance() -> Self;
}
pub trait Store<S> {
/// # Safety
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
/// environment.
unsafe fn unpack(p: S) -> Self;
}
pub trait StoreBytes {
/// # Safety
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
/// environment.
unsafe fn unsafe_read_le(input: &[u8]) -> Self;
/// # Safety
/// Caller must ensure the type of Self is appropriate for the hardware of the execution
/// environment.
unsafe fn unsafe_read_be(input: &[u8]) -> Self;
fn write_le(self, out: &mut [u8]);
fn write_be(self, out: &mut [u8]);

View File

@ -1,7 +1,7 @@
// crate minimums: sse2, x86_64
use core::arch::x86_64::{__m128i, __m256i};
use crate::types::*;
use core::arch::x86_64::{__m128i, __m256i};
mod sse2;
@ -137,6 +137,13 @@ impl Default for vec128_storage {
vec128_storage { u128x1: [0] }
}
}
impl Eq for vec128_storage {}
impl PartialEq for vec128_storage {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
unsafe { self.u128x1 == rhs.u128x1 }
}
}
#[allow(non_camel_case_types)]
#[derive(Copy, Clone)]
@ -167,6 +174,13 @@ impl vec256_storage {
unsafe { self.sse2 }
}
}
impl Eq for vec256_storage {}
impl PartialEq for vec256_storage {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
unsafe { self.sse2 == rhs.sse2 }
}
}
#[allow(non_camel_case_types)]
#[derive(Copy, Clone)]
@ -193,6 +207,13 @@ impl vec512_storage {
unsafe { self.sse2 }
}
}
impl Eq for vec512_storage {}
impl PartialEq for vec512_storage {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
unsafe { self.avx == rhs.avx }
}
}
macro_rules! impl_into {
($storage:ident, $array:ty, $name:ident) => {

View File

@ -166,28 +166,23 @@ macro_rules! impl_bitops128 {
macro_rules! rotr_32_s3 {
($name:ident, $k0:expr, $k1:expr) => {
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_shuffle_epi8(
self.x,
_mm_set_epi64x($k0, $k1),
)
})
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
}
};
}
macro_rules! rotr_32 {
($name:ident, $i:expr) => {
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_or_si128(
_mm_srli_epi32(self.x, $i as i32),
_mm_slli_epi32(self.x, 32 - $i as i32),
)
})
}
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_or_si128(
_mm_srli_epi32(self.x, $i as i32),
_mm_slli_epi32(self.x, 32 - $i as i32),
)
})
}
};
}
impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<YesS3, S4, NI> {
@ -228,28 +223,23 @@ impl<S4: Copy, NI: Copy> RotateEachWord32 for u32x4_sse2<NoS3, S4, NI> {
macro_rules! rotr_64_s3 {
($name:ident, $k0:expr, $k1:expr) => {
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_shuffle_epi8(
self.x,
_mm_set_epi64x($k0, $k1),
)
})
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe { _mm_shuffle_epi8(self.x, _mm_set_epi64x($k0, $k1)) })
}
};
}
macro_rules! rotr_64 {
($name:ident, $i:expr) => {
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_or_si128(
_mm_srli_epi64(self.x, $i as i32),
_mm_slli_epi64(self.x, 64 - $i as i32),
)
})
}
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_or_si128(
_mm_srli_epi64(self.x, $i as i32),
_mm_slli_epi64(self.x, 64 - $i as i32),
)
})
}
};
}
impl<S4: Copy, NI: Copy> RotateEachWord32 for u64x2_sse2<YesS3, S4, NI> {
@ -296,15 +286,15 @@ impl<S3: Copy, S4: Copy, NI: Copy> RotateEachWord64 for u64x2_sse2<S3, S4, NI> {
macro_rules! rotr_128 {
($name:ident, $i:expr) => {
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_or_si128(
_mm_srli_si128(self.x, $i as i32),
_mm_slli_si128(self.x, 128 - $i as i32),
)
})
}
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
_mm_or_si128(
_mm_srli_si128(self.x, $i as i32),
_mm_slli_si128(self.x, 128 - $i as i32),
)
})
}
};
}
// TODO: completely unoptimized
@ -411,7 +401,7 @@ impl<S3, S4, NI> MultiLane<[u128; 1]> for u128x1_sse2<S3, S4, NI> {
}
#[inline(always)]
fn from_lanes(xs: [u128; 1]) -> Self {
unimplemented!()
unimplemented!("{:?}", xs)
}
}
@ -780,7 +770,7 @@ impl<S4, NI> BSwap for u128x1_sse2<YesS3, S4, NI> {
impl<S4, NI> BSwap for u128x1_sse2<NoS3, S4, NI> {
#[inline(always)]
fn bswap(self) -> Self {
Self::new(unsafe { unimplemented!() })
unimplemented!()
}
}
@ -1078,6 +1068,7 @@ impl<W: PartialEq, G> PartialEq for x2<W, G> {
}
}
#[allow(unused)]
#[inline(always)]
unsafe fn eq128_s4(x: __m128i, y: __m128i) -> bool {
let q = _mm_shuffle_epi32(_mm_cmpeq_epi64(x, y), 0b1100_0110);
@ -1136,13 +1127,14 @@ where
}
#[cfg(test)]
#[cfg(target_arch = "x86_64")]
mod test {
use super::*;
use crate::x86_64::{SSE2, SSE41, SSSE3};
use crate::Machine;
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
fn test_bswap32_s2_vs_s3() {
let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
@ -1165,7 +1157,7 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
fn test_bswap64_s2_vs_s3() {
let xs = [0x0f0e_0d0c_0b0a_0908, 0x0706_0504_0302_0100];
let ys = [0x0809_0a0b_0c0d_0e0f, 0x0001_0203_0405_0607];
@ -1188,7 +1180,7 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
fn test_shuffle32_s2_vs_s3() {
let xs = [0x0, 0x1, 0x2, 0x3];
let ys = [0x2, 0x3, 0x0, 0x1];
@ -1226,7 +1218,7 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(target_feature = "ssse3"), ignore)]
fn test_shuffle64_s2_vs_s3() {
let xs = [0x0, 0x1, 0x2, 0x3];
let ys = [0x2, 0x3, 0x0, 0x1];
@ -1263,8 +1255,8 @@ mod test {
assert_eq!(x_s3, unsafe { core::mem::transmute(x_s3) });
}
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
#[test]
#[cfg(target_arch = "x86_64")]
fn test_lanes_u32x4() {
let xs = [0x1, 0x2, 0x3, 0x4];
@ -1295,7 +1287,7 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
fn test_lanes_u64x2() {
let xs = [0x1, 0x2];
@ -1326,7 +1318,6 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_vec4_u32x4_s2() {
let xs = [1, 2, 3, 4];
let s2 = unsafe { SSE2::instance() };
@ -1342,7 +1333,7 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
fn test_vec4_u32x4_s4() {
let xs = [1, 2, 3, 4];
let s4 = unsafe { SSE41::instance() };
@ -1358,7 +1349,6 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
fn test_vec2_u64x2_s2() {
let xs = [0x1, 0x2];
let s2 = unsafe { SSE2::instance() };
@ -1370,7 +1360,7 @@ mod test {
}
#[test]
#[cfg(target_arch = "x86_64")]
#[cfg_attr(not(all(target_feature = "ssse3", target_feature = "sse4.1")), ignore)]
fn test_vec4_u64x2_s4() {
let xs = [0x1, 0x2];
let s4 = unsafe { SSE41::instance() };
@ -1493,19 +1483,13 @@ pub mod avx2 {
impl<NI> ArithOps for u32x4x4_avx2<NI> where NI: Copy {}
macro_rules! shuf_lane_bytes {
($name:ident, $k0:expr, $k1:expr) => {
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
[
_mm256_shuffle_epi8(
self.x[0],
_mm256_set_epi64x($k0, $k1, $k0, $k1),
),
_mm256_shuffle_epi8(
self.x[1],
_mm256_set_epi64x($k0, $k1, $k0, $k1),
)
]
#[inline(always)]
fn $name(self) -> Self {
Self::new(unsafe {
[
_mm256_shuffle_epi8(self.x[0], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
_mm256_shuffle_epi8(self.x[1], _mm256_set_epi64x($k0, $k1, $k0, $k1)),
]
})
}
};
@ -1523,7 +1507,7 @@ pub mod avx2 {
_mm256_or_si256(
_mm256_srli_epi32(self.x[1], $i as i32),
_mm256_slli_epi32(self.x[1], 32 - $i as i32),
)
),
]
})
}