Bug 1882334 - Upgrade xsimd to version ce58d62666c315140eb54042498d93114edbaa68 r=padenot

This notably brings in i8mm neon extension to be used in Firefox
translation

Differential Revision: https://phabricator.services.mozilla.com/D202839
This commit is contained in:
serge-sans-paille 2024-02-28 08:12:37 +00:00
parent c63526fe1c
commit f51ce4e961
17 changed files with 427 additions and 18 deletions

View File

@ -26,7 +26,7 @@ namespace xsimd
using namespace types;
// abs
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
template <class A, class T, class>
inline batch<T, A> abs(batch<T, A> const& self, requires_arch<generic>) noexcept
{
if (std::is_unsigned<T>::value)
@ -45,6 +45,63 @@ namespace xsimd
return hypot(z.real(), z.imag());
}
// avg
namespace detail
{
template <class A, class T>
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::false_type) noexcept
{
return (x & y) + ((x ^ y) >> 1);
}
template <class A, class T>
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::true_type) noexcept
{
// Inspired by
// https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
auto t = (x & y) + ((x ^ y) >> 1);
auto t_u = bitwise_cast<typename std::make_unsigned<T>::type>(t);
auto avg = t + (bitwise_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
return avg;
}
template <class A, class T>
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::false_type, std::true_type) noexcept
{
return (x + y) / 2;
}
}
template <class A, class T>
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, requires_arch<generic>) noexcept
{
return detail::avg(x, y, typename std::is_integral<T>::type {}, typename std::is_signed<T>::type {});
}
// avgr
namespace detail
{
template <class A, class T>
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::true_type) noexcept
{
constexpr unsigned shift = 8 * sizeof(T) - 1;
auto adj = std::is_signed<T>::value ? ((x ^ y) & 0x1) : (((x ^ y) << shift) >> shift);
return ::xsimd::kernel::avg(x, y, A {}) + adj;
}
template <class A, class T>
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::false_type) noexcept
{
return ::xsimd::kernel::avg(x, y, A {});
}
}
template <class A, class T>
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, requires_arch<generic>) noexcept
{
return detail::avgr(x, y, typename std::is_integral<T>::type {});
}
// batch_cast
template <class A, class T>
inline batch<T, A> batch_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<generic>) noexcept

View File

@ -76,6 +76,44 @@ namespace xsimd
}
}
// avgr
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm256_avg_epu8(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm256_avg_epu16(self, other);
}
else
{
return avgr(self, other, generic {});
}
}
// avg
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
auto adj = ((self ^ other) << 7) >> 7;
return avgr(self, other, A {}) - adj;
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
auto adj = ((self ^ other) << 15) >> 15;
return avgr(self, other, A {}) - adj;
}
else
{
return avg(self, other, generic {});
}
}
// bitwise_and
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept

View File

@ -112,6 +112,44 @@ namespace xsimd
}
}
// avgr
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm512_avg_epu8(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm512_avg_epu16(self, other);
}
else
{
return avgr(self, other, generic {});
}
}
// avg
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
auto adj = ((self ^ other) << 7) >> 7;
return avgr(self, other, A {}) - adj;
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
auto adj = ((self ^ other) << 15) >> 15;
return avgr(self, other, A {}) - adj;
}
else
{
return avg(self, other, generic {});
}
}
// bitwise_lshift
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
inline batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx512bw>) noexcept

View File

@ -0,0 +1,17 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
#ifndef XSIMD_I8MM_NEON64_HPP
#define XSIMD_I8MM_NEON64_HPP
#include "../types/xsimd_i8mm_neon64_register.hpp"
#endif

View File

@ -104,6 +104,10 @@
#include "./xsimd_neon64.hpp"
#endif
#if XSIMD_WITH_I8MM_NEON64
#include "./xsimd_i8mm_neon64.hpp"
#endif
#if XSIMD_WITH_SVE
#include "./xsimd_sve.hpp"
#endif

View File

@ -23,33 +23,39 @@
// Wrap intrinsics so we can pass them as function pointers
// - OP: intrinsics name prefix, e.g., vorrq
// - RT: type traits to deduce intrinsics return types
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
#define WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
namespace wrap \
{ \
inline RT<uint8x16_t> OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \
{ \
return ::OP##_u8(a, b); \
} \
inline RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
{ \
return ::OP##_s8(a, b); \
} \
inline RT<uint16x8_t> OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \
{ \
return ::OP##_u16(a, b); \
} \
inline RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
{ \
return ::OP##_s16(a, b); \
} \
inline RT<uint32x4_t> OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \
{ \
return ::OP##_u32(a, b); \
} \
inline RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
{ \
return ::OP##_s32(a, b); \
} \
}
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
namespace wrap \
{ \
inline RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
{ \
return ::OP##_s8(a, b); \
} \
inline RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
{ \
return ::OP##_s16(a, b); \
} \
inline RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
{ \
return ::OP##_s32(a, b); \
} \
}
#define WRAP_BINARY_INT(OP, RT) \
@ -204,6 +210,10 @@ namespace xsimd
uint32x4_t, int32x4_t,
float32x4_t>;
using excluding_int64f32_dispatcher = neon_dispatcher_impl<uint8x16_t, int8x16_t,
uint16x8_t, int16x8_t,
uint32x4_t, int32x4_t>;
/**************************
* comparison dispatchers *
**************************/
@ -744,6 +754,38 @@ namespace xsimd
return dispatcher.apply(register_type(lhs), register_type(rhs));
}
/*******
* avg *
*******/
WRAP_BINARY_UINT_EXCLUDING_64(vhaddq, detail::identity_return_type)
template <class A, class T, class = typename std::enable_if<(std::is_unsigned<T>::value && sizeof(T) != 8), void>::type>
inline batch<T, A> avg(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{
using register_type = typename batch<T, A>::register_type;
const detail::neon_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary dispatcher = {
std::make_tuple(wrap::vhaddq_u8, wrap::vhaddq_u16, wrap::vhaddq_u32)
};
return dispatcher.apply(register_type(lhs), register_type(rhs));
}
/********
* avgr *
********/
WRAP_BINARY_UINT_EXCLUDING_64(vrhaddq, detail::identity_return_type)
template <class A, class T, class = typename std::enable_if<(std::is_unsigned<T>::value && sizeof(T) != 8), void>::type>
inline batch<T, A> avgr(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{
using register_type = typename batch<T, A>::register_type;
const detail::neon_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary dispatcher = {
std::make_tuple(wrap::vrhaddq_u8, wrap::vrhaddq_u16, wrap::vrhaddq_u32)
};
return dispatcher.apply(register_type(lhs), register_type(rhs));
}
/********
* sadd *
********/

View File

@ -92,7 +92,7 @@ namespace xsimd
template <class A, class T>
inline batch<T, A> broadcast(T val, requires_arch<neon64>) noexcept
{
return broadcast<neon64>(val, neon {});
return broadcast<A>(val, neon {});
}
template <class A>

View File

@ -142,6 +142,39 @@ namespace xsimd
return x + y;
}
template <class T, class Tp>
inline typename std::common_type<T, Tp>::type avg(T const& x, Tp const& y) noexcept
{
using common_type = typename std::common_type<T, Tp>::type;
if (std::is_floating_point<common_type>::value)
return (x + y) / 2;
else if (std::is_unsigned<common_type>::value)
{
return (x & y) + ((x ^ y) >> 1);
}
else
{
// Inspired by
// https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
auto t = (x & y) + ((x ^ y) >> 1);
auto t_u = static_cast<typename std::make_unsigned<common_type>::type>(t);
auto avg = t + (static_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
return avg;
}
}
template <class T, class Tp>
inline typename std::common_type<T, Tp>::type avgr(T const& x, Tp const& y) noexcept
{
using common_type = typename std::common_type<T, Tp>::type;
if (std::is_floating_point<common_type>::value)
return avg(x, y);
else
{
return avg(x, y) + ((x ^ y) & 1);
}
}
template <class T>
inline T incr(T const& x) noexcept
{

View File

@ -60,6 +60,10 @@ namespace xsimd
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
template <class A, typename T, typename ITy, ITy... Indices>
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
template <class A, class T>
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
template <class A, class T>
inline batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
// abs
template <class A>
@ -148,6 +152,44 @@ namespace xsimd
return _mm_movemask_epi8(self) != 0;
}
// avgr
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return _mm_avg_epu8(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return _mm_avg_epu16(self, other);
}
else
{
return avgr(self, other, generic {});
}
}
// avg
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
auto adj = ((self ^ other) << 7) >> 7;
return avgr(self, other, A {}) - adj;
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
auto adj = ((self ^ other) << 15) >> 15;
return avgr(self, other, A {}) - adj;
}
else
{
return avg(self, other, generic {});
}
}
// batch_bool_cast
template <class A, class T_out, class T_in>
inline batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<sse2>) noexcept

View File

@ -37,6 +37,8 @@ namespace xsimd
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
template <class A, typename T, typename ITy, ITy... Indices>
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
template <class A, class T>
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
// abs
template <class A, class T, typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, void>::type>
@ -116,6 +118,44 @@ namespace xsimd
return wasm_f64x2_add(self, other);
}
// avgr
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
return wasm_u8x16_avgr(self, other);
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
return wasm_u16x8_avgr(self, other);
}
else
{
return avgr(self, other, generic {});
}
}
// avg
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
auto adj = ((self ^ other) << 7) >> 7;
return avgr(self, other, A {}) - adj;
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
auto adj = ((self ^ other) << 15) >> 15;
return avgr(self, other, A {}) - adj;
}
else
{
return avg(self, other, generic {});
}
}
// all
template <class A>
inline bool all(batch_bool<float, A> const& self, requires_arch<wasm>) noexcept

View File

@ -194,7 +194,7 @@ namespace xsimd
using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
using all_rvv_architectures = arch_list<detail::rvv<512>, detail::rvv<256>, detail::rvv<128>>;
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type;
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<i8mm<neon64>, neon64, neon>>::type;
using all_riscv_architectures = all_rvv_architectures;
using all_wasm_architectures = arch_list<wasm>;
using all_architectures = typename detail::join<all_riscv_architectures, all_wasm_architectures, all_arm_architectures, all_x86_architectures>::type;

View File

@ -349,6 +349,17 @@
#define XSIMD_WITH_NEON64 0
#endif
/**
* @ingroup xsimd_config_macro
*
* Set to 1 if i8mm neon64 extension is available at compile-time, to 0 otherwise.
*/
#if defined(__ARM_FEATURE_MATMUL_INT8)
#define XSIMD_WITH_I8MM_NEON64 1
#else
#define XSIMD_WITH_I8MM_NEON64 0
#endif
/**
* @ingroup xsimd_config_macro
*

View File

@ -18,6 +18,11 @@
#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
#include <asm/hwcap.h>
#include <sys/auxv.h>
#ifndef HWCAP2_I8MM
#define HWCAP2_I8MM (1 << 13)
#endif
#endif
#if defined(_MSC_VER)
@ -66,6 +71,7 @@ namespace xsimd
ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi)
ARCH_FIELD(neon)
ARCH_FIELD(neon64)
ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
ARCH_FIELD(sve)
ARCH_FIELD(rvv)
ARCH_FIELD(wasm)
@ -83,6 +89,9 @@ namespace xsimd
#if defined(__aarch64__) || defined(_M_ARM64)
neon = 1;
neon64 = 1;
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
#endif
#elif defined(__ARM_NEON) || defined(_M_ARM)
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)

View File

@ -36,6 +36,8 @@
#include "xsimd_avx512dq_register.hpp"
#include "xsimd_avx512f_register.hpp"
#include "xsimd_i8mm_neon64_register.hpp"
#include "xsimd_neon64_register.hpp"
#include "xsimd_neon_register.hpp"

View File

@ -202,6 +202,36 @@ namespace xsimd
return kernel::atanh<A>(x, A {});
}
/**
* @ingroup batch_math
*
* Computes the average of batches \c x and \c y
* @param x batch of T
* @param y batch of T
* @return the average of elements between \c x and \c y.
*/
template <class T, class A>
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::avg<A>(x, y, A {});
}
/**
* @ingroup batch_math
*
* Computes the rounded average of batches \c x and \c y
* @param x batch of T
* @param y batch of T
* @return the rounded average of elements between \c x and \c y.
*/
template <class T, class A>
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::avgr<A>(x, y, A {});
}
/**
* @ingroup batch_conversion
*

View File

@ -0,0 +1,46 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
#ifndef XSIMD_I8MM_NEON64_REGISTER_HPP
#define XSIMD_I8MM_NEON64_REGISTER_HPP
#include "./xsimd_neon64_register.hpp"
namespace xsimd
{
template <typename arch>
struct i8mm;
/**
* @ingroup architectures
*
* Neon64 + i8mm instructions
*/
template <>
struct i8mm<neon64> : neon64
{
static constexpr bool supported() noexcept { return XSIMD_WITH_I8MM_NEON64; }
static constexpr bool available() noexcept { return true; }
static constexpr unsigned version() noexcept { return generic::version(8, 2, 0); }
static constexpr char const* name() noexcept { return "i8mm+neon64"; }
};
#if XSIMD_WITH_I8MM_NEON64
namespace types
{
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(i8mm<neon64>, neon64);
}
#endif
}
#endif

View File

@ -10,8 +10,8 @@ origin:
url: https://github.com/QuantStack/xsimd
release: ead07427834c82aac105d36b8671abbe915c441c (2024-02-05T07:06:11Z).
revision: ead07427834c82aac105d36b8671abbe915c441c
release: ce58d62666c315140eb54042498d93114edbaa68 (2024-02-27T16:05:37Z).
revision: ce58d62666c315140eb54042498d93114edbaa68
license: BSD-3-Clause