Bug 1927534 - Update xsimd to e384105a2a3809c319f0740e2ebf6166da895fcb r=padenot

Differential Revision: https://phabricator.services.mozilla.com/D227075
This commit is contained in:
Updatebot 2024-11-19 11:14:43 +00:00
parent 0fffe21e9d
commit b6f870224e
9 changed files with 388 additions and 9 deletions

View File

@ -639,6 +639,32 @@ namespace xsimd
hi.store_unaligned(buffer + real_batch::size);
}
// transpose
template <class A, class T>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<generic>) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
(void)matrix_end;
alignas(A::alignment()) T scratch_buffer[batch<T, A>::size * batch<T, A>::size];
for (size_t i = 0; i < batch<T, A>::size; ++i)
{
matrix_begin[i].store_aligned(&scratch_buffer[i * batch<T, A>::size]);
}
// FIXME: this is super naive we can probably do better.
for (size_t i = 0; i < batch<T, A>::size; ++i)
{
for (size_t j = 0; j < i; ++j)
{
std::swap(scratch_buffer[i * batch<T, A>::size + j],
scratch_buffer[j * batch<T, A>::size + i]);
}
}
for (size_t i = 0; i < batch<T, A>::size; ++i)
{
matrix_begin[i] = batch<T, A>::load_aligned(&scratch_buffer[i * batch<T, A>::size]);
}
}
}
}

View File

@ -1594,6 +1594,87 @@ namespace xsimd
return bitwise_cast<T>(
swizzle(bitwise_cast<double>(self), mask));
}
// transpose
template <class A>
XSIMD_INLINE void transpose(batch<float, A>* matrix_begin, batch<float, A>* matrix_end, requires_arch<avx>) noexcept
{
assert((matrix_end - matrix_begin == batch<float, A>::size) && "correctly sized matrix");
(void)matrix_end;
// See
// https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2
auto r0 = matrix_begin[0], r1 = matrix_begin[1],
r2 = matrix_begin[2], r3 = matrix_begin[3],
r4 = matrix_begin[4], r5 = matrix_begin[5],
r6 = matrix_begin[6], r7 = matrix_begin[7];
auto t0 = _mm256_unpacklo_ps(r0, r1);
auto t1 = _mm256_unpackhi_ps(r0, r1);
auto t2 = _mm256_unpacklo_ps(r2, r3);
auto t3 = _mm256_unpackhi_ps(r2, r3);
auto t4 = _mm256_unpacklo_ps(r4, r5);
auto t5 = _mm256_unpackhi_ps(r4, r5);
auto t6 = _mm256_unpacklo_ps(r6, r7);
auto t7 = _mm256_unpackhi_ps(r6, r7);
r0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0));
r1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2));
r2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0));
r3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2));
r4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0));
r5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2));
r6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0));
r7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2));
matrix_begin[0] = _mm256_permute2f128_ps(r0, r4, 0x20);
matrix_begin[1] = _mm256_permute2f128_ps(r1, r5, 0x20);
matrix_begin[2] = _mm256_permute2f128_ps(r2, r6, 0x20);
matrix_begin[3] = _mm256_permute2f128_ps(r3, r7, 0x20);
matrix_begin[4] = _mm256_permute2f128_ps(r0, r4, 0x31);
matrix_begin[5] = _mm256_permute2f128_ps(r1, r5, 0x31);
matrix_begin[6] = _mm256_permute2f128_ps(r2, r6, 0x31);
matrix_begin[7] = _mm256_permute2f128_ps(r3, r7, 0x31);
}
template <class A>
XSIMD_INLINE void transpose(batch<uint32_t, A>* matrix_begin, batch<uint32_t, A>* matrix_end, requires_arch<avx>) noexcept
{
return transpose(reinterpret_cast<batch<float, A>*>(matrix_begin), reinterpret_cast<batch<float, A>*>(matrix_end), A {});
}
template <class A>
XSIMD_INLINE void transpose(batch<int32_t, A>* matrix_begin, batch<int32_t, A>* matrix_end, requires_arch<avx>) noexcept
{
return transpose(reinterpret_cast<batch<float, A>*>(matrix_begin), reinterpret_cast<batch<float, A>*>(matrix_end), A {});
}
template <class A>
XSIMD_INLINE void transpose(batch<double, A>* matrix_begin, batch<double, A>* matrix_end, requires_arch<avx>) noexcept
{
assert((matrix_end - matrix_begin == batch<double, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1],
r2 = matrix_begin[2], r3 = matrix_begin[3];
auto t0 = _mm256_unpacklo_pd(r0, r1); // r00 r10 r01 r11
auto t1 = _mm256_unpackhi_pd(r0, r1); // r02 r12 r03 r13
auto t2 = _mm256_unpacklo_pd(r2, r3); // r20 r30 r21 r31
auto t3 = _mm256_unpackhi_pd(r2, r3); // r22 r32 r23 r33
matrix_begin[0] = _mm256_permute2f128_pd(t0, t2, 0x20);
matrix_begin[1] = _mm256_permute2f128_pd(t1, t3, 0x20);
matrix_begin[2] = _mm256_permute2f128_pd(t0, t2, 0x31);
matrix_begin[3] = _mm256_permute2f128_pd(t1, t3, 0x31);
}
template <class A>
XSIMD_INLINE void transpose(batch<uint64_t, A>* matrix_begin, batch<uint64_t, A>* matrix_end, requires_arch<avx>) noexcept
{
return transpose(reinterpret_cast<batch<double, A>*>(matrix_begin), reinterpret_cast<batch<double, A>*>(matrix_end), A {});
}
template <class A>
XSIMD_INLINE void transpose(batch<int64_t, A>* matrix_begin, batch<int64_t, A>* matrix_end, requires_arch<avx>) noexcept
{
return transpose(reinterpret_cast<batch<double, A>*>(matrix_begin), reinterpret_cast<batch<double, A>*>(matrix_end), A {});
}
// trunc
template <class A>

View File

@ -1748,6 +1748,69 @@ namespace xsimd
return select(batch_bool<T, A> { b... }, true_br, false_br, neon {});
}
/*************
* transpose *
*************/
template <class A>
XSIMD_INLINE void transpose(batch<float, A>* matrix_begin, batch<float, A>* matrix_end, requires_arch<neon>) noexcept
{
assert((matrix_end - matrix_begin == batch<float, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
auto t01 = vtrnq_f32(r0, r1);
auto t23 = vtrnq_f32(r2, r3);
matrix_begin[0] = vcombine_f32(vget_low_f32(t01.val[0]), vget_low_f32(t23.val[0]));
matrix_begin[1] = vcombine_f32(vget_low_f32(t01.val[1]), vget_low_f32(t23.val[1]));
matrix_begin[2] = vcombine_f32(vget_high_f32(t01.val[0]), vget_high_f32(t23.val[0]));
matrix_begin[3] = vcombine_f32(vget_high_f32(t01.val[1]), vget_high_f32(t23.val[1]));
}
template <class A>
XSIMD_INLINE void transpose(batch<uint32_t, A>* matrix_begin, batch<uint32_t, A>* matrix_end, requires_arch<neon>) noexcept
{
assert((matrix_end - matrix_begin == batch<uint32_t, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
auto t01 = vtrnq_u32(r0, r1);
auto t23 = vtrnq_u32(r2, r3);
matrix_begin[0] = vcombine_u32(vget_low_u32(t01.val[0]), vget_low_u32(t23.val[0]));
matrix_begin[1] = vcombine_u32(vget_low_u32(t01.val[1]), vget_low_u32(t23.val[1]));
matrix_begin[2] = vcombine_u32(vget_high_u32(t01.val[0]), vget_high_u32(t23.val[0]));
matrix_begin[3] = vcombine_u32(vget_high_u32(t01.val[1]), vget_high_u32(t23.val[1]));
}
template <class A>
XSIMD_INLINE void transpose(batch<int32_t, A>* matrix_begin, batch<int32_t, A>* matrix_end, requires_arch<neon>) noexcept
{
assert((matrix_end - matrix_begin == batch<int32_t, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
auto t01 = vtrnq_s32(r0, r1);
auto t23 = vtrnq_s32(r2, r3);
matrix_begin[0] = vcombine_s32(vget_low_s32(t01.val[0]), vget_low_s32(t23.val[0]));
matrix_begin[1] = vcombine_s32(vget_low_s32(t01.val[1]), vget_low_s32(t23.val[1]));
matrix_begin[2] = vcombine_s32(vget_high_s32(t01.val[0]), vget_high_s32(t23.val[0]));
matrix_begin[3] = vcombine_s32(vget_high_s32(t01.val[1]), vget_high_s32(t23.val[1]));
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<neon>) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = vcombine_u64(vget_low_u64(r0), vget_low_u64(r1));
matrix_begin[1] = vcombine_u64(vget_high_u64(r0), vget_high_u64(r1));
}
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<neon>) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = vcombine_s64(vget_low_s64(r0), vget_low_s64(r1));
matrix_begin[1] = vcombine_s64(vget_high_s64(r0), vget_high_s64(r1));
}
/**********
* zip_lo *
**********/
@ -2737,6 +2800,7 @@ namespace xsimd
return set(batch<T, A>(), A(), data[idx]...);
}
}
}
#undef WRAP_BINARY_INT_EXCLUDING_64

View File

@ -949,6 +949,37 @@ namespace xsimd
{
return select(batch_bool<double, A> { b... }, true_br, false_br, neon64 {});
}
template <class A>
XSIMD_INLINE void transpose(batch<double, A>* matrix_begin, batch<double, A>* matrix_end, requires_arch<neon64>) noexcept
{
assert((matrix_end - matrix_begin == batch<double, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = vzip1q_f64(r0, r1);
matrix_begin[1] = vzip2q_f64(r0, r1);
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<neon64>) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = vzip1q_u64(r0, r1);
matrix_begin[1] = vzip2q_u64(r0, r1);
}
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<neon64>) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = vzip1q_s64(r0, r1);
matrix_begin[1] = vzip2q_s64(r0, r1);
}
/**********
* zip_lo *
**********/

View File

@ -1640,6 +1640,50 @@ namespace xsimd
return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, sse2 {}));
}
// transpose
template <class A>
XSIMD_INLINE void transpose(batch<float, A>* matrix_begin, batch<float, A>* matrix_end, requires_arch<sse2>) noexcept
{
assert((matrix_end - matrix_begin == batch<float, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
_MM_TRANSPOSE4_PS(r0, r1, r2, r3);
matrix_begin[0] = r0;
matrix_begin[1] = r1;
matrix_begin[2] = r2;
matrix_begin[3] = r3;
}
template <class A>
XSIMD_INLINE void transpose(batch<uint32_t, A>* matrix_begin, batch<uint32_t, A>* matrix_end, requires_arch<sse2>) noexcept
{
transpose(reinterpret_cast<batch<float, A>*>(matrix_begin), reinterpret_cast<batch<float, A>*>(matrix_end), A {});
}
template <class A>
XSIMD_INLINE void transpose(batch<int32_t, A>* matrix_begin, batch<int32_t, A>* matrix_end, requires_arch<sse2>) noexcept
{
transpose(reinterpret_cast<batch<float, A>*>(matrix_begin), reinterpret_cast<batch<float, A>*>(matrix_end), A {});
}
template <class A>
XSIMD_INLINE void transpose(batch<double, A>* matrix_begin, batch<double, A>* matrix_end, requires_arch<sse2>) noexcept
{
assert((matrix_end - matrix_begin == batch<double, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = _mm_unpacklo_pd(r0, r1);
matrix_begin[1] = _mm_unpackhi_pd(r0, r1);
}
template <class A>
XSIMD_INLINE void transpose(batch<uint64_t, A>* matrix_begin, batch<uint64_t, A>* matrix_end, requires_arch<sse2>) noexcept
{
transpose(reinterpret_cast<batch<double, A>*>(matrix_begin), reinterpret_cast<batch<double, A>*>(matrix_end), A {});
}
template <class A>
XSIMD_INLINE void transpose(batch<int64_t, A>* matrix_begin, batch<int64_t, A>* matrix_end, requires_arch<sse2>) noexcept
{
transpose(reinterpret_cast<batch<double, A>*>(matrix_begin), reinterpret_cast<batch<double, A>*>(matrix_end), A {});
}
// zip_hi
template <class A>
XSIMD_INLINE batch<float, A> zip_hi(batch<float, A> const& self, batch<float, A> const& other, requires_arch<sse2>) noexcept

View File

@ -39,6 +39,8 @@ namespace xsimd
XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<generic>) noexcept;
template <class A, class T>
XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
template <class A, class T>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<generic>) noexcept;
// abs
template <class A, class T, typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, void>::type>
@ -1576,6 +1578,40 @@ namespace xsimd
return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, wasm {}));
}
// transpose
template <class A, class T>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<wasm>) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
(void)matrix_end;
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{
auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
auto t0 = wasm_i32x4_shuffle(r0, r1, 0, 4, 1, 5); // r0[0] r1[0] r0[1] r1[1]
auto t1 = wasm_i32x4_shuffle(r0, r1, 2, 6, 3, 7); // r0[2] r1[2] r0[3] r1[3]
auto t2 = wasm_i32x4_shuffle(r2, r3, 0, 4, 1, 5); // r2[0] r3[0] r2[1] r3[1]
auto t3 = wasm_i32x4_shuffle(r2, r3, 2, 6, 3, 7); // r2[2] r3[2] r2[3] r3[3]
matrix_begin[0] = wasm_i32x4_shuffle(t0, t2, 0, 1, 4, 5); // r0[0] r1[0] r2[0] r3[0]
matrix_begin[1] = wasm_i32x4_shuffle(t0, t2, 2, 3, 6, 7); // r0[1] r1[1] r2[1] r3[1]
matrix_begin[2] = wasm_i32x4_shuffle(t1, t3, 0, 1, 4, 5); // r0[2] r1[2] r2[2] r3[2]
matrix_begin[3] = wasm_i32x4_shuffle(t1, t3, 2, 3, 6, 7); // r0[3] r1[3] r2[3] r3[3]
}
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{
auto r0 = matrix_begin[0], r1 = matrix_begin[1];
matrix_begin[0] = wasm_i64x2_shuffle(r0, r1, 0, 2);
matrix_begin[1] = wasm_i64x2_shuffle(r0, r1, 1, 3);
}
else
{
transpose(matrix_begin, matrix_end, generic {});
}
}
// trunc
template <class A>
XSIMD_INLINE batch<float, A> trunc(batch<float, A> const& self, requires_arch<wasm>) noexcept

View File

@ -28,13 +28,14 @@ namespace xsimd
*
* @defgroup batch_arithmetic Arithmetic operators
* @defgroup batch_constant Constant batches
* @defgroup batch_cond Conditional operators
* @defgroup batch_data_transfer Memory operators
* @defgroup batch_math Basic math operators
* @defgroup batch_math_extra Extra math operators
* @defgroup batch_fp Floating point manipulation
* @defgroup batch_rounding Rounding operators
* @defgroup batch_conversion Conversion operators
* @defgroup batch_complex_op Complex operators
* @defgroup batch_complex Complex operators
* @defgroup batch_logical Logical operators
* @defgroup batch_bitwise Bitwise operators
* @defgroup batch_reducers Reducers
@ -1890,7 +1891,7 @@ namespace xsimd
}
/**
* @ingroup rotate_left
* @ingroup batch_data_transfer
*
* Slide the whole batch to the left by \c n bytes, and reintroduce the
* slided out elements from the right. This is different from
@ -1908,7 +1909,7 @@ namespace xsimd
}
/**
* @ingroup rotate_right
* @ingroup batch_data_transfer
*
* Slide the whole batch to the right by \c n bytes, and reintroduce the
* slided out elements from the left. This is different from
@ -2021,7 +2022,7 @@ namespace xsimd
}
/**
* @ingroup batch_miscellaneous
* @ingroup batch_cond
*
* Ternary operator for batches: selects values from the batches \c true_br or \c false_br
* depending on the boolean values in the constant batch \c cond. Equivalent to
@ -2042,7 +2043,7 @@ namespace xsimd
}
/**
* @ingroup batch_miscellaneous
* @ingroup batch_cond
*
* Ternary operator for batches: selects values from the batches \c true_br or \c false_br
* depending on the boolean values in the constant batch \c cond. Equivalent to
@ -2063,7 +2064,7 @@ namespace xsimd
}
/**
* @ingroup batch_miscellaneous
* @ingroup batch_cond
*
* Ternary operator for batches: selects values from the batches \c true_br or \c false_br
* depending on the boolean values in the constant batch \c cond. Equivalent to
@ -2515,6 +2516,23 @@ namespace xsimd
return batch_cast<as_integer_t<T>>(x);
}
/**
* @ingroup batch_data_transfer
*
* Transposes in place the matrix whose line are each of the batch passed as
* argument.
* @param matrix_begin pointer to the first line of the matrix to transpose
* @param matrix_end pointer to one element after the last line of the matrix to transpose
*
*/
template <class T, class A>
XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end) noexcept
{
assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
detail::static_check_supported_config<T, A>();
return kernel::transpose(matrix_begin, matrix_end, A {});
}
/**
* @ingroup batch_rounding
*

View File

@ -95,8 +95,10 @@ namespace xsimd
} \
template <> \
XSIMD_INLINE type bitcast<type>(type x) noexcept { return x; } \
static XSIMD_INLINE byte_type as_bytes(type x) noexcept \
template <class U> \
static XSIMD_INLINE byte_type as_bytes(U x) noexcept \
{ \
static_assert(std::is_same<U, type>::value, "inconsistent conversion types"); \
const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \
return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, 8, m, vmul)(words); \
} \
@ -125,6 +127,83 @@ namespace xsimd
#undef XSIMD_RVV_MAKE_TYPES
#undef XSIMD_RVV_MAKE_TYPE
// Specialization needed for #1058
template <>
XSIMD_INLINE rvv_type_info<int8_t, rvv_width_m1 * 8>::type
rvv_type_info<int8_t, rvv_width_m1 * 8>::bitcast<__rvv_uint8m8_t>(
__rvv_uint8m8_t x) noexcept
{
return __riscv_vreinterpret_i8m8(x);
}
template <>
XSIMD_INLINE rvv_type_info<int8_t, rvv_width_m1 * 1>::type
rvv_type_info<int8_t, rvv_width_m1 * 1>::bitcast<__rvv_uint8m1_t>(
__rvv_uint8m1_t x) noexcept
{
return __riscv_vreinterpret_i8m1(x);
}
template <>
XSIMD_INLINE rvv_type_info<uint16_t, rvv_width_m1 * 1>::type
rvv_type_info<uint16_t, rvv_width_m1 * 1>::bitcast<__rvv_uint8m1_t>(
__rvv_uint8m1_t x) noexcept
{
return __riscv_vreinterpret_u16m1(x);
}
template <>
XSIMD_INLINE rvv_type_info<uint32_t, rvv_width_m1 * 1>::type
rvv_type_info<uint32_t, rvv_width_m1 * 1>::bitcast<__rvv_uint8m1_t>(
__rvv_uint8m1_t x) noexcept
{
return __riscv_vreinterpret_u32m1(x);
}
template <>
XSIMD_INLINE rvv_type_info<uint64_t, rvv_width_m1 * 1>::type
rvv_type_info<uint64_t, rvv_width_m1 * 1>::bitcast<__rvv_uint8m1_t>(
__rvv_uint8m1_t x) noexcept
{
return __riscv_vreinterpret_u64m1(x);
}
//
template <>
XSIMD_INLINE rvv_type_info<int8_t, rvv_width_m1 * 8>::byte_type
rvv_type_info<int8_t, rvv_width_m1 * 8>::as_bytes<__rvv_int8m8_t>(__rvv_int8m8_t x) noexcept
{
return __riscv_vreinterpret_u8m8(x);
}
template <>
XSIMD_INLINE rvv_type_info<int8_t, rvv_width_m1 * 1>::byte_type
rvv_type_info<int8_t, rvv_width_m1 * 1>::as_bytes<__rvv_int8m1_t>(__rvv_int8m1_t x) noexcept
{
return __riscv_vreinterpret_u8m1(x);
}
template <>
XSIMD_INLINE rvv_type_info<uint8_t, rvv_width_m1 * 1>::byte_type
rvv_type_info<uint8_t, rvv_width_m1 * 1>::as_bytes<__rvv_uint8m1_t>(__rvv_uint8m1_t x) noexcept
{
return x;
}
template <>
XSIMD_INLINE rvv_type_info<uint16_t, rvv_width_m1 * 1>::byte_type
rvv_type_info<uint16_t, rvv_width_m1 * 1>::as_bytes<__rvv_uint16m1_t>(__rvv_uint16m1_t x) noexcept
{
return __riscv_vreinterpret_u8m1(x);
}
template <>
XSIMD_INLINE rvv_type_info<uint32_t, rvv_width_m1 * 1>::byte_type
rvv_type_info<uint32_t, rvv_width_m1 * 1>::as_bytes<__rvv_uint32m1_t>(__rvv_uint32m1_t x) noexcept
{
return __riscv_vreinterpret_u8m1(x);
}
template <>
XSIMD_INLINE rvv_type_info<uint64_t, rvv_width_m1 * 1>::byte_type
rvv_type_info<uint64_t, rvv_width_m1 * 1>::as_bytes<__rvv_uint64m1_t>(__rvv_uint64m1_t x) noexcept
{
return __riscv_vreinterpret_u8m1(x);
}
// rvv_blob is storage-type abstraction for a vector register.
template <class T, size_t Width>
struct rvv_blob : public rvv_type_info<T, Width>

View File

@ -10,8 +10,8 @@ origin:
url: https://github.com/QuantStack/xsimd
release: 50a69bf8bc892b854a0490ba6cc9a73031347f01 (2024-09-18T13:09:40Z).
revision: 50a69bf8bc892b854a0490ba6cc9a73031347f01
release: e384105a2a3809c319f0740e2ebf6166da895fcb (2024-10-16T06:11:04Z).
revision: e384105a2a3809c319f0740e2ebf6166da895fcb
license: BSD-3-Clause