mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-24 14:00:03 +00:00
Merge pull request #15628 from hrydgard/arm64-neon-flag
Add ARM64_NEON compile arch flag
This commit is contained in:
commit
8ae673572a
@ -258,7 +258,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1]));
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1]));
|
||||
#else
|
||||
memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float));
|
||||
@ -270,7 +270,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
@ -283,7 +283,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
@ -296,7 +296,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
@ -331,7 +331,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits)));
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1])));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
@ -344,7 +344,7 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
_mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask)));
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1])));
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
|
28
GPU/Math3D.h
28
GPU/Math3D.h
@ -69,7 +69,7 @@ public:
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
int32x4_t ivec;
|
||||
float32x4_t vec;
|
||||
#endif
|
||||
@ -84,7 +84,7 @@ public:
|
||||
#if defined(_M_SSE)
|
||||
Vec2(const __m128 &_vec) : vec(_vec) {}
|
||||
Vec2(const __m128i &_ivec) : ivec(_ivec) {}
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
Vec2(const float32x4_t &_vec) : vec(_vec) {}
|
||||
#if !defined(_MSC_VER)
|
||||
Vec2(const int32x4_t &_ivec) : ivec(_ivec) {}
|
||||
@ -217,7 +217,7 @@ public:
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
int32x4_t ivec;
|
||||
float32x4_t vec;
|
||||
#endif
|
||||
@ -236,7 +236,7 @@ public:
|
||||
Vec3(const Vec3Packed<T> &_xyz) {
|
||||
vec = _mm_loadu_ps(_xyz.AsArray());
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
Vec3(const float32x4_t &_vec) : vec(_vec) {}
|
||||
#if !defined(_MSC_VER)
|
||||
Vec3(const int32x4_t &_ivec) : ivec(_ivec) {}
|
||||
@ -576,7 +576,7 @@ public:
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
int32x4_t ivec;
|
||||
float32x4_t vec;
|
||||
#endif
|
||||
@ -593,7 +593,7 @@ public:
|
||||
#if defined(_M_SSE)
|
||||
Vec4(const __m128 &_vec) : vec(_vec) {}
|
||||
Vec4(const __m128i &_ivec) : ivec(_ivec) {}
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
Vec4(const float32x4_t &_vec) : vec(_vec) {}
|
||||
#if !defined(_MSC_VER)
|
||||
Vec4(const int32x4_t &_ivec) : ivec(_ivec) {}
|
||||
@ -918,7 +918,7 @@ inline void Vec3ByMatrix43(float vecOut[3], const float v[3], const float m[12])
|
||||
vecOut[0] = _mm_cvtss_f32(sum);
|
||||
vecOut[1] = vectorGetByIndex<1>(sum);
|
||||
vecOut[2] = vectorGetByIndex<2>(sum);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
float32x4_t sum = Vec3ByMatrix43Internal(vld1q_f32(v), m);
|
||||
vecOut[0] = vgetq_lane_f32(sum, 0);
|
||||
vecOut[1] = vgetq_lane_f32(sum, 1);
|
||||
@ -936,7 +936,7 @@ inline Vec3f MATH3D_CALL Vec3ByMatrix43(const Vec3f v, const float m[12]) {
|
||||
__m128 y = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 z = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
return Vec3ByMatrix43Internal(x, y, z, m);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
return Vec3ByMatrix43Internal(v.vec, m);
|
||||
#else
|
||||
Vec3f vecOut;
|
||||
@ -957,7 +957,7 @@ inline __m128 MATH3D_CALL Vec3ByMatrix44Internal(__m128 x, __m128 y, __m128 z, c
|
||||
_mm_add_ps(_mm_mul_ps(col2, z), col3));
|
||||
return sum;
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
inline float32x4_t Vec3ByMatrix44Internal(float32x4_t vec, const float m[16]) {
|
||||
float32x4_t col0 = vld1q_f32(m);
|
||||
float32x4_t col1 = vld1q_f32(m + 4);
|
||||
@ -977,7 +977,7 @@ inline void Vec3ByMatrix44(float vecOut[4], const float v[3], const float m[16])
|
||||
__m128 z = _mm_set1_ps(v[2]);
|
||||
__m128 sum = Vec3ByMatrix44Internal(x, y, z, m);
|
||||
_mm_storeu_ps(vecOut, sum);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
float32x4_t sum = Vec3ByMatrix44Internal(vld1q_f32(v), m);
|
||||
vst1q_f32(vecOut, sum);
|
||||
#else
|
||||
@ -994,7 +994,7 @@ inline Vec4f MATH3D_CALL Vec3ByMatrix44(const Vec3f v, const float m[16]) {
|
||||
__m128 y = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 z = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
return Vec3ByMatrix44Internal(x, y, z, m);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
return Vec3ByMatrix44Internal(v.vec, m);
|
||||
#else
|
||||
Vec4f vecOut;
|
||||
@ -1014,7 +1014,7 @@ inline __m128 MATH3D_CALL Norm3ByMatrix43Internal(__m128 x, __m128 y, __m128 z,
|
||||
_mm_mul_ps(col2, z));
|
||||
return sum;
|
||||
}
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
inline float32x4_t Norm3ByMatrix43Internal(float32x4_t vec, const float m[16]) {
|
||||
float32x4_t col0 = vld1q_f32(m);
|
||||
float32x4_t col1 = vld1q_f32(m + 3);
|
||||
@ -1035,7 +1035,7 @@ inline void Norm3ByMatrix43(float vecOut[3], const float v[3], const float m[12]
|
||||
vecOut[0] = _mm_cvtss_f32(sum);
|
||||
vecOut[1] = vectorGetByIndex<1>(sum);
|
||||
vecOut[2] = vectorGetByIndex<2>(sum);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
float32x4_t sum = Norm3ByMatrix43Internal(vld1q_f32(v), m);
|
||||
vecOut[0] = vgetq_lane_f32(sum, 0);
|
||||
vecOut[1] = vgetq_lane_f32(sum, 1);
|
||||
@ -1053,7 +1053,7 @@ inline Vec3f MATH3D_CALL Norm3ByMatrix43(const Vec3f v, const float m[12]) {
|
||||
__m128 y = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
__m128 z = _mm_shuffle_ps(v.vec, v.vec, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
return Norm3ByMatrix43Internal(x, y, z, m);
|
||||
#elif PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
return Norm3ByMatrix43Internal(v.vec, m);
|
||||
#else
|
||||
Vec3f vecOut;
|
||||
|
@ -24,7 +24,7 @@ namespace Rasterizer {
|
||||
void RegCache::SetupABI(const std::vector<Purpose> &args, bool forceRetain) {
|
||||
#if PPSSPP_ARCH(ARM)
|
||||
_assert_msg_(false, "Not yet implemented");
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
using namespace Arm64Gen;
|
||||
|
||||
// ARM64 has a generous allotment of registers.
|
||||
@ -389,7 +389,7 @@ RegCache::RegStatus *RegCache::FindReg(Reg r, Purpose p) {
|
||||
}
|
||||
|
||||
CodeBlock::CodeBlock(int size)
|
||||
#if PPSSPP_ARCH(ARM64)
|
||||
#if PPSSPP_ARCH(ARM64_NEON)
|
||||
: fp(this)
|
||||
#endif
|
||||
{
|
||||
|
@ -28,7 +28,7 @@
|
||||
#if defined(_M_SSE)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#if PPSSPP_ARCH(ARM_NEON) && PPSSPP_ARCH(ARM64)
|
||||
#if PPSSPP_ARCH(ARM64_NEON)
|
||||
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
@ -38,7 +38,7 @@
|
||||
|
||||
#if PPSSPP_ARCH(ARM)
|
||||
#include "Common/ArmEmitter.h"
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
#include "Common/Arm64Emitter.h"
|
||||
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||
#include "Common/x64Emitter.h"
|
||||
@ -54,7 +54,7 @@ namespace Rasterizer {
|
||||
// While not part of the reg cache proper, this is the type it is built for.
|
||||
#if PPSSPP_ARCH(ARM)
|
||||
typedef ArmGen::ARMXCodeBlock BaseCodeBlock;
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
typedef Arm64Gen::ARM64CodeBlock BaseCodeBlock;
|
||||
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||
typedef Gen::XCodeBlock BaseCodeBlock;
|
||||
@ -65,7 +65,7 @@ typedef FakeGen::FakeXCodeBlock BaseCodeBlock;
|
||||
#endif
|
||||
|
||||
// We also have the types of things that end up in regs.
|
||||
#if PPSSPP_ARCH(ARM64)
|
||||
#if PPSSPP_ARCH(ARM64_NEON)
|
||||
typedef int32x4_t Vec4IntArg;
|
||||
typedef int32x4_t Vec4IntResult;
|
||||
typedef float32x4_t Vec4FloatArg;
|
||||
@ -160,7 +160,7 @@ struct RegCache {
|
||||
#if PPSSPP_ARCH(ARM)
|
||||
typedef ArmGen::ARMReg Reg;
|
||||
static constexpr Reg REG_INVALID_VALUE = ArmGen::INVALID_REG;
|
||||
#elif PPSSPP_ARCH(ARM64)
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
typedef Arm64Gen::ARM64Reg Reg;
|
||||
static constexpr Reg REG_INVALID_VALUE = Arm64Gen::INVALID_REG;
|
||||
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||
@ -242,7 +242,7 @@ protected:
|
||||
void WriteDynamicConst8x16(const u8 *&ptr, uint16_t value);
|
||||
void WriteDynamicConst4x32(const u8 *&ptr, uint32_t value);
|
||||
|
||||
#if PPSSPP_ARCH(ARM64)
|
||||
#if PPSSPP_ARCH(ARM64_NEON)
|
||||
Arm64Gen::ARM64FloatEmitter fp;
|
||||
#endif
|
||||
|
||||
|
@ -54,6 +54,7 @@
|
||||
#define PPSSPP_ARCH_ARM64 1
|
||||
#define PPSSPP_ARCH_64BIT 1
|
||||
#define PPSSPP_ARCH_ARM_NEON 1
|
||||
#define PPSSPP_ARCH_ARM64_NEON 1
|
||||
#endif
|
||||
|
||||
#if defined(__mips64__)
|
||||
|
Loading…
Reference in New Issue
Block a user