mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Add NEON impl too, mostly for exercise purposes
This commit is contained in:
parent
e6f0f84a45
commit
d0e8cfa365
@ -52,6 +52,13 @@ inline uint32_t Float4ToUint8x4(const float f[4]) {
|
||||
__m128 value = _mm_mul_ps(_mm_loadu_ps(f), _mm_load_ps(exactly_255_x4));
|
||||
__m128i ivalue = _mm_packus_epi16(_mm_packs_epi32(_mm_cvtps_epi32(value), zero), zero);
|
||||
return _mm_cvtsi128_si32(ivalue);
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
const float32x4_t value = vmulq_f32(vld1q_f32(f), vdupq_n_f32(255.0f));
|
||||
uint32x4_t ivalue32 = vcvtq_u32_f32(value);
|
||||
uint16x4_t ivalue16 = vqmovn_u32(ivalue32);
|
||||
uint8x8_t ivalue8 = vqmovn_u16(vcombine_u16(ivalue16, ivalue16)); // Is there no way to avoid the combine here?
|
||||
uint32x2_t outValue32 = vreinterpret_u8_u32(ivalue8);
|
||||
return vget_lane_u32(outValue32, 0);
|
||||
#else
|
||||
int i4[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@ -74,6 +81,13 @@ inline uint32_t Float4ToUint8x4_NoClamp(const float f[4]) {
|
||||
__m128 value = _mm_mul_ps(_mm_loadu_ps(f), _mm_load_ps(exactly_255_x4));
|
||||
__m128i ivalue = _mm_packus_epi16(_mm_packs_epi32(_mm_cvtps_epi32(value), zero), zero);
|
||||
return _mm_cvtsi128_si32(ivalue);
|
||||
#elif PPSSPP_ARCH(ARM_NEON)
|
||||
const float32x4_t value = vmulq_f32(vld1q_f32(f), vdupq_n_f32(255.0f));
|
||||
uint32x4_t ivalue32 = vcvtq_u32_f32(value);
|
||||
uint16x4_t ivalue16 = vqmovn_u32(ivalue32);
|
||||
uint8x8_t ivalue8 = vqmovn_u16(vcombine_u16(ivalue16, ivalue16)); // Is there no way to avoid the combine here?
|
||||
uint32x2_t outValue32 = vreinterpret_u8_u32(ivalue8);
|
||||
return vget_lane_u32(outValue32, 0);
|
||||
#else
|
||||
u32 i4[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
@ -41,6 +41,7 @@
|
||||
#endif
|
||||
|
||||
#include "Common/Data/Collections/TinySet.h"
|
||||
#include "Common/Data/Convert/SmallDataConvert.h"
|
||||
#include "Common/Data/Text/Parsers.h"
|
||||
#include "Common/Data/Text/WrapText.h"
|
||||
#include "Common/Data/Encoding/Utf8.h"
|
||||
@ -782,6 +783,15 @@ static bool TestWrapText() {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool TestSmallDataConvert() {
|
||||
float f[4] = { 1.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 4.0f / 255.f };
|
||||
uint32_t result = Float4ToUint8x4_NoClamp(f);
|
||||
EXPECT_EQ_HEX(result, 0x04030201);
|
||||
result = Float4ToUint8x4(f);
|
||||
EXPECT_EQ_HEX(result, 0x04030201);
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef bool (*TestFunc)();
|
||||
struct TestItem {
|
||||
const char *name;
|
||||
@ -832,6 +842,7 @@ TestItem availableTests[] = {
|
||||
TEST_ITEM(ThreadManager),
|
||||
TEST_ITEM(WrapText),
|
||||
TEST_ITEM(TinySet),
|
||||
TEST_ITEM(SmallDataConvert),
|
||||
};
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
|
Loading…
Reference in New Issue
Block a user