mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 12:51:06 +00:00
Bug 1882334 - Upgrade xsimd to version ce58d62666c315140eb54042498d93114edbaa68 r=padenot
This notably brings in i8mm neon extension to be used in Firefox translation Differential Revision: https://phabricator.services.mozilla.com/D202839
This commit is contained in:
parent
c63526fe1c
commit
f51ce4e961
@ -26,7 +26,7 @@ namespace xsimd
|
||||
|
||||
using namespace types;
|
||||
// abs
|
||||
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
|
||||
template <class A, class T, class>
|
||||
inline batch<T, A> abs(batch<T, A> const& self, requires_arch<generic>) noexcept
|
||||
{
|
||||
if (std::is_unsigned<T>::value)
|
||||
@ -45,6 +45,63 @@ namespace xsimd
|
||||
return hypot(z.real(), z.imag());
|
||||
}
|
||||
|
||||
// avg
|
||||
namespace detail
|
||||
{
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::false_type) noexcept
|
||||
{
|
||||
return (x & y) + ((x ^ y) >> 1);
|
||||
}
|
||||
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::true_type) noexcept
|
||||
{
|
||||
// Inspired by
|
||||
// https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
|
||||
auto t = (x & y) + ((x ^ y) >> 1);
|
||||
auto t_u = bitwise_cast<typename std::make_unsigned<T>::type>(t);
|
||||
auto avg = t + (bitwise_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
|
||||
return avg;
|
||||
}
|
||||
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::false_type, std::true_type) noexcept
|
||||
{
|
||||
return (x + y) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, requires_arch<generic>) noexcept
|
||||
{
|
||||
return detail::avg(x, y, typename std::is_integral<T>::type {}, typename std::is_signed<T>::type {});
|
||||
}
|
||||
|
||||
// avgr
|
||||
namespace detail
|
||||
{
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::true_type) noexcept
|
||||
{
|
||||
constexpr unsigned shift = 8 * sizeof(T) - 1;
|
||||
auto adj = std::is_signed<T>::value ? ((x ^ y) & 0x1) : (((x ^ y) << shift) >> shift);
|
||||
return ::xsimd::kernel::avg(x, y, A {}) + adj;
|
||||
}
|
||||
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::false_type) noexcept
|
||||
{
|
||||
return ::xsimd::kernel::avg(x, y, A {});
|
||||
}
|
||||
}
|
||||
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, requires_arch<generic>) noexcept
|
||||
{
|
||||
return detail::avgr(x, y, typename std::is_integral<T>::type {});
|
||||
}
|
||||
|
||||
// batch_cast
|
||||
template <class A, class T>
|
||||
inline batch<T, A> batch_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<generic>) noexcept
|
||||
|
@ -76,6 +76,44 @@ namespace xsimd
|
||||
}
|
||||
}
|
||||
|
||||
// avgr
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
return _mm256_avg_epu8(self, other);
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
return _mm256_avg_epu16(self, other);
|
||||
}
|
||||
else
|
||||
{
|
||||
return avgr(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// avg
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
auto adj = ((self ^ other) << 7) >> 7;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
auto adj = ((self ^ other) << 15) >> 15;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else
|
||||
{
|
||||
return avg(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// bitwise_and
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
|
||||
|
@ -112,6 +112,44 @@ namespace xsimd
|
||||
}
|
||||
}
|
||||
|
||||
// avgr
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
return _mm512_avg_epu8(self, other);
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
return _mm512_avg_epu16(self, other);
|
||||
}
|
||||
else
|
||||
{
|
||||
return avgr(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// avg
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
auto adj = ((self ^ other) << 7) >> 7;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
auto adj = ((self ^ other) << 15) >> 15;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else
|
||||
{
|
||||
return avg(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// bitwise_lshift
|
||||
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
|
||||
inline batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx512bw>) noexcept
|
||||
|
17
third_party/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp
vendored
Normal file
17
third_party/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_I8MM_NEON64_HPP
|
||||
#define XSIMD_I8MM_NEON64_HPP
|
||||
|
||||
#include "../types/xsimd_i8mm_neon64_register.hpp"
|
||||
|
||||
#endif
|
@ -104,6 +104,10 @@
|
||||
#include "./xsimd_neon64.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_I8MM_NEON64
|
||||
#include "./xsimd_i8mm_neon64.hpp"
|
||||
#endif
|
||||
|
||||
#if XSIMD_WITH_SVE
|
||||
#include "./xsimd_sve.hpp"
|
||||
#endif
|
||||
|
@ -23,33 +23,39 @@
|
||||
// Wrap intrinsics so we can pass them as function pointers
|
||||
// - OP: intrinsics name prefix, e.g., vorrq
|
||||
// - RT: type traits to deduce intrinsics return types
|
||||
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
|
||||
#define WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
|
||||
namespace wrap \
|
||||
{ \
|
||||
inline RT<uint8x16_t> OP##_u8(uint8x16_t a, uint8x16_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_u8(a, b); \
|
||||
} \
|
||||
inline RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_s8(a, b); \
|
||||
} \
|
||||
inline RT<uint16x8_t> OP##_u16(uint16x8_t a, uint16x8_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_u16(a, b); \
|
||||
} \
|
||||
inline RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_s16(a, b); \
|
||||
} \
|
||||
inline RT<uint32x4_t> OP##_u32(uint32x4_t a, uint32x4_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_u32(a, b); \
|
||||
} \
|
||||
inline RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_s32(a, b); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define WRAP_BINARY_INT_EXCLUDING_64(OP, RT) \
|
||||
WRAP_BINARY_UINT_EXCLUDING_64(OP, RT) \
|
||||
namespace wrap \
|
||||
{ \
|
||||
inline RT<int8x16_t> OP##_s8(int8x16_t a, int8x16_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_s8(a, b); \
|
||||
} \
|
||||
inline RT<int16x8_t> OP##_s16(int16x8_t a, int16x8_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_s16(a, b); \
|
||||
} \
|
||||
inline RT<int32x4_t> OP##_s32(int32x4_t a, int32x4_t b) noexcept \
|
||||
{ \
|
||||
return ::OP##_s32(a, b); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define WRAP_BINARY_INT(OP, RT) \
|
||||
@ -204,6 +210,10 @@ namespace xsimd
|
||||
uint32x4_t, int32x4_t,
|
||||
float32x4_t>;
|
||||
|
||||
using excluding_int64f32_dispatcher = neon_dispatcher_impl<uint8x16_t, int8x16_t,
|
||||
uint16x8_t, int16x8_t,
|
||||
uint32x4_t, int32x4_t>;
|
||||
|
||||
/**************************
|
||||
* comparison dispatchers *
|
||||
**************************/
|
||||
@ -744,6 +754,38 @@ namespace xsimd
|
||||
return dispatcher.apply(register_type(lhs), register_type(rhs));
|
||||
}
|
||||
|
||||
/*******
|
||||
* avg *
|
||||
*******/
|
||||
|
||||
WRAP_BINARY_UINT_EXCLUDING_64(vhaddq, detail::identity_return_type)
|
||||
|
||||
template <class A, class T, class = typename std::enable_if<(std::is_unsigned<T>::value && sizeof(T) != 8), void>::type>
|
||||
inline batch<T, A> avg(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
|
||||
{
|
||||
using register_type = typename batch<T, A>::register_type;
|
||||
const detail::neon_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary dispatcher = {
|
||||
std::make_tuple(wrap::vhaddq_u8, wrap::vhaddq_u16, wrap::vhaddq_u32)
|
||||
};
|
||||
return dispatcher.apply(register_type(lhs), register_type(rhs));
|
||||
}
|
||||
|
||||
/********
|
||||
* avgr *
|
||||
********/
|
||||
|
||||
WRAP_BINARY_UINT_EXCLUDING_64(vrhaddq, detail::identity_return_type)
|
||||
|
||||
template <class A, class T, class = typename std::enable_if<(std::is_unsigned<T>::value && sizeof(T) != 8), void>::type>
|
||||
inline batch<T, A> avgr(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
|
||||
{
|
||||
using register_type = typename batch<T, A>::register_type;
|
||||
const detail::neon_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary dispatcher = {
|
||||
std::make_tuple(wrap::vrhaddq_u8, wrap::vrhaddq_u16, wrap::vrhaddq_u32)
|
||||
};
|
||||
return dispatcher.apply(register_type(lhs), register_type(rhs));
|
||||
}
|
||||
|
||||
/********
|
||||
* sadd *
|
||||
********/
|
||||
|
@ -92,7 +92,7 @@ namespace xsimd
|
||||
template <class A, class T>
|
||||
inline batch<T, A> broadcast(T val, requires_arch<neon64>) noexcept
|
||||
{
|
||||
return broadcast<neon64>(val, neon {});
|
||||
return broadcast<A>(val, neon {});
|
||||
}
|
||||
|
||||
template <class A>
|
||||
|
@ -142,6 +142,39 @@ namespace xsimd
|
||||
return x + y;
|
||||
}
|
||||
|
||||
template <class T, class Tp>
|
||||
inline typename std::common_type<T, Tp>::type avg(T const& x, Tp const& y) noexcept
|
||||
{
|
||||
using common_type = typename std::common_type<T, Tp>::type;
|
||||
if (std::is_floating_point<common_type>::value)
|
||||
return (x + y) / 2;
|
||||
else if (std::is_unsigned<common_type>::value)
|
||||
{
|
||||
return (x & y) + ((x ^ y) >> 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Inspired by
|
||||
// https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
|
||||
auto t = (x & y) + ((x ^ y) >> 1);
|
||||
auto t_u = static_cast<typename std::make_unsigned<common_type>::type>(t);
|
||||
auto avg = t + (static_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
|
||||
return avg;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class Tp>
|
||||
inline typename std::common_type<T, Tp>::type avgr(T const& x, Tp const& y) noexcept
|
||||
{
|
||||
using common_type = typename std::common_type<T, Tp>::type;
|
||||
if (std::is_floating_point<common_type>::value)
|
||||
return avg(x, y);
|
||||
else
|
||||
{
|
||||
return avg(x, y) + ((x ^ y) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline T incr(T const& x) noexcept
|
||||
{
|
||||
|
@ -60,6 +60,10 @@ namespace xsimd
|
||||
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
|
||||
template <class A, typename T, typename ITy, ITy... Indices>
|
||||
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avgr(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
|
||||
|
||||
// abs
|
||||
template <class A>
|
||||
@ -148,6 +152,44 @@ namespace xsimd
|
||||
return _mm_movemask_epi8(self) != 0;
|
||||
}
|
||||
|
||||
// avgr
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
return _mm_avg_epu8(self, other);
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
return _mm_avg_epu16(self, other);
|
||||
}
|
||||
else
|
||||
{
|
||||
return avgr(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// avg
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse2>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
auto adj = ((self ^ other) << 7) >> 7;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
auto adj = ((self ^ other) << 15) >> 15;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else
|
||||
{
|
||||
return avg(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// batch_bool_cast
|
||||
template <class A, class T_out, class T_in>
|
||||
inline batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<sse2>) noexcept
|
||||
|
@ -37,6 +37,8 @@ namespace xsimd
|
||||
inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
|
||||
template <class A, typename T, typename ITy, ITy... Indices>
|
||||
inline batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<ITy, A>, Indices...>, requires_arch<generic>) noexcept;
|
||||
template <class A, class T>
|
||||
inline batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
|
||||
|
||||
// abs
|
||||
template <class A, class T, typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, void>::type>
|
||||
@ -116,6 +118,44 @@ namespace xsimd
|
||||
return wasm_f64x2_add(self, other);
|
||||
}
|
||||
|
||||
// avgr
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
return wasm_u8x16_avgr(self, other);
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
return wasm_u16x8_avgr(self, other);
|
||||
}
|
||||
else
|
||||
{
|
||||
return avgr(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// avg
|
||||
template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
|
||||
inline batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
|
||||
{
|
||||
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
|
||||
{
|
||||
auto adj = ((self ^ other) << 7) >> 7;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
|
||||
{
|
||||
auto adj = ((self ^ other) << 15) >> 15;
|
||||
return avgr(self, other, A {}) - adj;
|
||||
}
|
||||
else
|
||||
{
|
||||
return avg(self, other, generic {});
|
||||
}
|
||||
}
|
||||
|
||||
// all
|
||||
template <class A>
|
||||
inline bool all(batch_bool<float, A> const& self, requires_arch<wasm>) noexcept
|
||||
|
@ -194,7 +194,7 @@ namespace xsimd
|
||||
|
||||
using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
|
||||
using all_rvv_architectures = arch_list<detail::rvv<512>, detail::rvv<256>, detail::rvv<128>>;
|
||||
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type;
|
||||
using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<i8mm<neon64>, neon64, neon>>::type;
|
||||
using all_riscv_architectures = all_rvv_architectures;
|
||||
using all_wasm_architectures = arch_list<wasm>;
|
||||
using all_architectures = typename detail::join<all_riscv_architectures, all_wasm_architectures, all_arm_architectures, all_x86_architectures>::type;
|
||||
|
@ -349,6 +349,17 @@
|
||||
#define XSIMD_WITH_NEON64 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
* Set to 1 if i8mm neon64 extension is available at compile-time, to 0 otherwise.
|
||||
*/
|
||||
#if defined(__ARM_FEATURE_MATMUL_INT8)
|
||||
#define XSIMD_WITH_I8MM_NEON64 1
|
||||
#else
|
||||
#define XSIMD_WITH_I8MM_NEON64 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ingroup xsimd_config_macro
|
||||
*
|
||||
|
@ -18,6 +18,11 @@
|
||||
#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
|
||||
#include <asm/hwcap.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#ifndef HWCAP2_I8MM
|
||||
#define HWCAP2_I8MM (1 << 13)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
@ -66,6 +71,7 @@ namespace xsimd
|
||||
ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi)
|
||||
ARCH_FIELD(neon)
|
||||
ARCH_FIELD(neon64)
|
||||
ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
|
||||
ARCH_FIELD(sve)
|
||||
ARCH_FIELD(rvv)
|
||||
ARCH_FIELD(wasm)
|
||||
@ -83,6 +89,9 @@ namespace xsimd
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
neon = 1;
|
||||
neon64 = 1;
|
||||
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
|
||||
i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
|
||||
#endif
|
||||
#elif defined(__ARM_NEON) || defined(_M_ARM)
|
||||
|
||||
#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
|
||||
|
@ -36,6 +36,8 @@
|
||||
#include "xsimd_avx512dq_register.hpp"
|
||||
#include "xsimd_avx512f_register.hpp"
|
||||
|
||||
#include "xsimd_i8mm_neon64_register.hpp"
|
||||
|
||||
#include "xsimd_neon64_register.hpp"
|
||||
#include "xsimd_neon_register.hpp"
|
||||
|
||||
|
@ -202,6 +202,36 @@ namespace xsimd
|
||||
return kernel::atanh<A>(x, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_math
|
||||
*
|
||||
* Computes the average of batches \c x and \c y
|
||||
* @param x batch of T
|
||||
* @param y batch of T
|
||||
* @return the average of elements between \c x and \c y.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::avg<A>(x, y, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_math
|
||||
*
|
||||
* Computes the rounded average of batches \c x and \c y
|
||||
* @param x batch of T
|
||||
* @param y batch of T
|
||||
* @return the rounded average of elements between \c x and \c y.
|
||||
*/
|
||||
template <class T, class A>
|
||||
inline batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y) noexcept
|
||||
{
|
||||
detail::static_check_supported_config<T, A>();
|
||||
return kernel::avgr<A>(x, y, A {});
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup batch_conversion
|
||||
*
|
||||
|
46
third_party/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp
vendored
Normal file
46
third_party/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
/***************************************************************************
|
||||
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
|
||||
* Martin Renou *
|
||||
* Copyright (c) QuantStack *
|
||||
* Copyright (c) Serge Guelton *
|
||||
* *
|
||||
* Distributed under the terms of the BSD 3-Clause License. *
|
||||
* *
|
||||
* The full license is in the file LICENSE, distributed with this software. *
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef XSIMD_I8MM_NEON64_REGISTER_HPP
|
||||
#define XSIMD_I8MM_NEON64_REGISTER_HPP
|
||||
|
||||
#include "./xsimd_neon64_register.hpp"
|
||||
|
||||
namespace xsimd
|
||||
{
|
||||
template <typename arch>
|
||||
struct i8mm;
|
||||
|
||||
/**
|
||||
* @ingroup architectures
|
||||
*
|
||||
* Neon64 + i8mm instructions
|
||||
*/
|
||||
template <>
|
||||
struct i8mm<neon64> : neon64
|
||||
{
|
||||
static constexpr bool supported() noexcept { return XSIMD_WITH_I8MM_NEON64; }
|
||||
static constexpr bool available() noexcept { return true; }
|
||||
static constexpr unsigned version() noexcept { return generic::version(8, 2, 0); }
|
||||
static constexpr char const* name() noexcept { return "i8mm+neon64"; }
|
||||
};
|
||||
|
||||
#if XSIMD_WITH_I8MM_NEON64
|
||||
namespace types
|
||||
{
|
||||
|
||||
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(i8mm<neon64>, neon64);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
4
third_party/xsimd/moz.yaml
vendored
4
third_party/xsimd/moz.yaml
vendored
@ -10,8 +10,8 @@ origin:
|
||||
|
||||
url: https://github.com/QuantStack/xsimd
|
||||
|
||||
release: ead07427834c82aac105d36b8671abbe915c441c (2024-02-05T07:06:11Z).
|
||||
revision: ead07427834c82aac105d36b8671abbe915c441c
|
||||
release: ce58d62666c315140eb54042498d93114edbaa68 (2024-02-27T16:05:37Z).
|
||||
revision: ce58d62666c315140eb54042498d93114edbaa68
|
||||
|
||||
license: BSD-3-Clause
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user