mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Remove the assembly implementation of 4x4 matrix mul, in favor of intrinsics.
This commit is contained in:
parent
0126bc181c
commit
ed8c4e8758
@ -626,10 +626,7 @@ add_library(Common STATIC
|
|||||||
Common/Input/KeyCodes.h
|
Common/Input/KeyCodes.h
|
||||||
Common/Input/InputState.cpp
|
Common/Input/InputState.cpp
|
||||||
Common/Input/InputState.h
|
Common/Input/InputState.h
|
||||||
Common/Math/fast/fast_math.c
|
|
||||||
Common/Math/fast/fast_matrix.c
|
Common/Math/fast/fast_matrix.c
|
||||||
Common/Math/fast/fast_matrix_neon.S
|
|
||||||
Common/Math/fast/fast_matrix_sse.c
|
|
||||||
Common/Math/curves.cpp
|
Common/Math/curves.cpp
|
||||||
Common/Math/curves.h
|
Common/Math/curves.h
|
||||||
Common/Math/expression_parser.cpp
|
Common/Math/expression_parser.cpp
|
||||||
|
@ -452,7 +452,6 @@
|
|||||||
<ClInclude Include="Input\KeyCodes.h" />
|
<ClInclude Include="Input\KeyCodes.h" />
|
||||||
<ClInclude Include="Math\curves.h" />
|
<ClInclude Include="Math\curves.h" />
|
||||||
<ClInclude Include="Math\expression_parser.h" />
|
<ClInclude Include="Math\expression_parser.h" />
|
||||||
<ClInclude Include="Math\fast\fast_math.h" />
|
|
||||||
<ClInclude Include="Math\fast\fast_matrix.h" />
|
<ClInclude Include="Math\fast\fast_matrix.h" />
|
||||||
<ClInclude Include="Math\geom2d.h" />
|
<ClInclude Include="Math\geom2d.h" />
|
||||||
<ClInclude Include="Math\lin\matrix4x4.h" />
|
<ClInclude Include="Math\lin\matrix4x4.h" />
|
||||||
@ -873,9 +872,7 @@
|
|||||||
<ClCompile Include="Log.cpp" />
|
<ClCompile Include="Log.cpp" />
|
||||||
<ClCompile Include="Math\curves.cpp" />
|
<ClCompile Include="Math\curves.cpp" />
|
||||||
<ClCompile Include="Math\expression_parser.cpp" />
|
<ClCompile Include="Math\expression_parser.cpp" />
|
||||||
<ClCompile Include="Math\fast\fast_math.c" />
|
|
||||||
<ClCompile Include="Math\fast\fast_matrix.c" />
|
<ClCompile Include="Math\fast\fast_matrix.c" />
|
||||||
<ClCompile Include="Math\fast\fast_matrix_sse.c" />
|
|
||||||
<ClCompile Include="Math\lin\matrix4x4.cpp" />
|
<ClCompile Include="Math\lin\matrix4x4.cpp" />
|
||||||
<ClCompile Include="Math\lin\vec3.cpp" />
|
<ClCompile Include="Math\lin\vec3.cpp" />
|
||||||
<ClCompile Include="Math\math_util.cpp" />
|
<ClCompile Include="Math\math_util.cpp" />
|
||||||
@ -992,7 +989,6 @@
|
|||||||
</ProjectReference>
|
</ProjectReference>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="Math\fast\fast_matrix_neon.S" />
|
|
||||||
<None Include="Math\lin\matrix_neon.s" />
|
<None Include="Math\lin\matrix_neon.s" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
@ -161,9 +161,6 @@
|
|||||||
<ClInclude Include="Math\lin\vec3.h">
|
<ClInclude Include="Math\lin\vec3.h">
|
||||||
<Filter>Math\lin</Filter>
|
<Filter>Math\lin</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="Math\fast\fast_math.h">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="Math\fast\fast_matrix.h">
|
<ClInclude Include="Math\fast\fast_matrix.h">
|
||||||
<Filter>Math\fast</Filter>
|
<Filter>Math\fast</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
@ -568,15 +565,9 @@
|
|||||||
<ClCompile Include="Math\lin\vec3.cpp">
|
<ClCompile Include="Math\lin\vec3.cpp">
|
||||||
<Filter>Math\lin</Filter>
|
<Filter>Math\lin</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="Math\fast\fast_math.c">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="Math\fast\fast_matrix.c">
|
<ClCompile Include="Math\fast\fast_matrix.c">
|
||||||
<Filter>Math\fast</Filter>
|
<Filter>Math\fast</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="Math\fast\fast_matrix_sse.c">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="Data\Format\RIFF.cpp">
|
<ClCompile Include="Data\Format\RIFF.cpp">
|
||||||
<Filter>Data\Format</Filter>
|
<Filter>Data\Format</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@ -914,8 +905,5 @@
|
|||||||
<None Include="Math\lin\matrix_neon.s">
|
<None Include="Math\lin\matrix_neon.s">
|
||||||
<Filter>Math\lin</Filter>
|
<Filter>Math\lin</Filter>
|
||||||
</None>
|
</None>
|
||||||
<None Include="Math\fast\fast_matrix_neon.S">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</None>
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
@ -1,12 +0,0 @@
|
|||||||
#include "ppsspp_config.h"
|
|
||||||
|
|
||||||
#include "fast_math.h"
|
|
||||||
#include "fast_matrix.h"
|
|
||||||
|
|
||||||
void InitFastMath() {
|
|
||||||
#ifndef _MSC_VER
|
|
||||||
#if PPSSPP_ARCH(ARM_NEON) && !PPSSPP_ARCH(ARM64)
|
|
||||||
fast_matrix_mul_4x4 = &fast_matrix_mul_4x4_neon;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Fast Math
|
|
||||||
// A mini library of math kernels. These should be large enough to be worth calling
|
|
||||||
// as functions, and generic enough to fit in the "native" library (not PSP specific stuff).
|
|
||||||
|
|
||||||
// NEON versions are dynamically selected at runtime, when you call InitFastMath.
|
|
||||||
|
|
||||||
// SSE versions are hard linked at compile time.
|
|
||||||
|
|
||||||
// See fast_matrix.h for the first set of functions.
|
|
||||||
|
|
||||||
void InitFastMath();
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
@ -1,6 +1,100 @@
|
|||||||
#include "fast_math.h"
|
#include "ppsspp_config.h"
|
||||||
|
|
||||||
#include "fast_matrix.h"
|
#include "fast_matrix.h"
|
||||||
|
|
||||||
|
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||||
|
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
#include "fast_matrix.h"
|
||||||
|
|
||||||
|
void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b) {
|
||||||
|
int i;
|
||||||
|
__m128 a_col_1 = _mm_loadu_ps(a);
|
||||||
|
__m128 a_col_2 = _mm_loadu_ps(&a[4]);
|
||||||
|
__m128 a_col_3 = _mm_loadu_ps(&a[8]);
|
||||||
|
__m128 a_col_4 = _mm_loadu_ps(&a[12]);
|
||||||
|
|
||||||
|
for (i = 0; i < 16; i += 4) {
|
||||||
|
__m128 r_col = _mm_mul_ps(a_col_1, _mm_set1_ps(b[i]));
|
||||||
|
r_col = _mm_add_ps(r_col, _mm_mul_ps(a_col_2, _mm_set1_ps(b[i + 1])));
|
||||||
|
r_col = _mm_add_ps(r_col, _mm_mul_ps(a_col_3, _mm_set1_ps(b[i + 2])));
|
||||||
|
r_col = _mm_add_ps(r_col, _mm_mul_ps(a_col_4, _mm_set1_ps(b[i + 3])));
|
||||||
|
_mm_storeu_ps(&dest[i], r_col);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif PPSSPP_ARCH(ARM_NEON)
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
|
||||||
|
#include <arm64_neon.h>
|
||||||
|
#else
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// From https://developer.arm.com/documentation/102467/0100/Matrix-multiplication-example
|
||||||
|
void fast_matrix_mul_4x4_neon(float *C, const float *A, const float *B) {
|
||||||
|
// these are the columns A
|
||||||
|
float32x4_t A0;
|
||||||
|
float32x4_t A1;
|
||||||
|
float32x4_t A2;
|
||||||
|
float32x4_t A3;
|
||||||
|
|
||||||
|
// these are the columns B
|
||||||
|
float32x4_t B0;
|
||||||
|
float32x4_t B1;
|
||||||
|
float32x4_t B2;
|
||||||
|
float32x4_t B3;
|
||||||
|
|
||||||
|
// these are the columns C
|
||||||
|
float32x4_t C0;
|
||||||
|
float32x4_t C1;
|
||||||
|
float32x4_t C2;
|
||||||
|
float32x4_t C3;
|
||||||
|
|
||||||
|
A0 = vld1q_f32(A);
|
||||||
|
A1 = vld1q_f32(A + 4);
|
||||||
|
A2 = vld1q_f32(A + 8);
|
||||||
|
A3 = vld1q_f32(A + 12);
|
||||||
|
|
||||||
|
// Zero accumulators for C values
|
||||||
|
C0 = vmovq_n_f32(0);
|
||||||
|
C1 = vmovq_n_f32(0);
|
||||||
|
C2 = vmovq_n_f32(0);
|
||||||
|
C3 = vmovq_n_f32(0);
|
||||||
|
|
||||||
|
// Multiply accumulate in 4x1 blocks, i.e. each column in C
|
||||||
|
B0 = vld1q_f32(B);
|
||||||
|
C0 = vfmaq_laneq_f32(C0, A0, B0, 0);
|
||||||
|
C0 = vfmaq_laneq_f32(C0, A1, B0, 1);
|
||||||
|
C0 = vfmaq_laneq_f32(C0, A2, B0, 2);
|
||||||
|
C0 = vfmaq_laneq_f32(C0, A3, B0, 3);
|
||||||
|
vst1q_f32(C, C0);
|
||||||
|
|
||||||
|
B1 = vld1q_f32(B + 4);
|
||||||
|
C1 = vfmaq_laneq_f32(C1, A0, B1, 0);
|
||||||
|
C1 = vfmaq_laneq_f32(C1, A1, B1, 1);
|
||||||
|
C1 = vfmaq_laneq_f32(C1, A2, B1, 2);
|
||||||
|
C1 = vfmaq_laneq_f32(C1, A3, B1, 3);
|
||||||
|
vst1q_f32(C + 4, C1);
|
||||||
|
|
||||||
|
B2 = vld1q_f32(B + 8);
|
||||||
|
C2 = vfmaq_laneq_f32(C2, A0, B2, 0);
|
||||||
|
C2 = vfmaq_laneq_f32(C2, A1, B2, 1);
|
||||||
|
C2 = vfmaq_laneq_f32(C2, A2, B2, 2);
|
||||||
|
C2 = vfmaq_laneq_f32(C2, A3, B2, 3);
|
||||||
|
vst1q_f32(C + 8, C2);
|
||||||
|
|
||||||
|
B3 = vld1q_f32(B + 12);
|
||||||
|
C3 = vfmaq_laneq_f32(C3, A0, B3, 0);
|
||||||
|
C3 = vfmaq_laneq_f32(C3, A1, B3, 1);
|
||||||
|
C3 = vfmaq_laneq_f32(C3, A2, B3, 2);
|
||||||
|
C3 = vfmaq_laneq_f32(C3, A3, B3, 3);
|
||||||
|
vst1q_f32(C + 12, C3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
#define xx 0
|
#define xx 0
|
||||||
#define xy 1
|
#define xy 1
|
||||||
#define xz 2
|
#define xz 2
|
||||||
@ -40,6 +134,4 @@ void fast_matrix_mul_4x4_c(float *dest, const float *a, const float *b) {
|
|||||||
dest[ww] = b[wx] * a[xw] + b[wy] * a[yw] + b[wz] * a[zw] + b[ww] * a[ww];
|
dest[ww] = b[wx] * a[xw] + b[wy] * a[yw] + b[wz] * a[zw] + b[ww] * a[ww];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef fast_matrix_mul_4x4
|
#endif
|
||||||
fptr_fast_matrix_mul_4x4 fast_matrix_mul_4x4 = &fast_matrix_mul_4x4_c;
|
|
||||||
#endif
|
|
||||||
|
@ -6,11 +6,8 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// A mini library of matrix math kernels.
|
// A mini library of 4x4 matrix muls.
|
||||||
|
|
||||||
// TODO: Really need to wrap this block in a macro or something, will get repetitive.
|
|
||||||
|
|
||||||
typedef void(*fptr_fast_matrix_mul_4x4)(float *dest, const float *a, const float *b);
|
|
||||||
extern void fast_matrix_mul_4x4_c(float *dest, const float *a, const float *b);
|
extern void fast_matrix_mul_4x4_c(float *dest, const float *a, const float *b);
|
||||||
extern void fast_matrix_mul_4x4_neon(float *dest, const float *a, const float *b);
|
extern void fast_matrix_mul_4x4_neon(float *dest, const float *a, const float *b);
|
||||||
extern void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b);
|
extern void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b);
|
||||||
@ -18,13 +15,12 @@ extern void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b)
|
|||||||
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||||
// Hard link to SSE implementations on x86/amd64
|
// Hard link to SSE implementations on x86/amd64
|
||||||
#define fast_matrix_mul_4x4 fast_matrix_mul_4x4_sse
|
#define fast_matrix_mul_4x4 fast_matrix_mul_4x4_sse
|
||||||
#elif PPSSPP_ARCH(ARM64)
|
#elif PPSSPP_ARCH(ARM_NEON)
|
||||||
#define fast_matrix_mul_4x4 fast_matrix_mul_4x4_c
|
#define fast_matrix_mul_4x4 fast_matrix_mul_4x4_neon
|
||||||
#else
|
#else
|
||||||
extern fptr_fast_matrix_mul_4x4 fast_matrix_mul_4x4;
|
#define fast_matrix_mul_4x4 fast_matrix_mul_4x4_c
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,54 +0,0 @@
|
|||||||
#include "ppsspp_config.h"
|
|
||||||
#if PPSSPP_ARCH(ARM_NEON) && !PPSSPP_ARCH(ARM64)
|
|
||||||
|
|
||||||
.syntax unified // Allow both ARM and Thumb-2 instructions
|
|
||||||
.text
|
|
||||||
.align 2 // Align the function code to a 4-byte (2^n) word boundary.
|
|
||||||
.arm // Use ARM instructions instead of Thumb.
|
|
||||||
|
|
||||||
@ From ARM samples
|
|
||||||
@
|
|
||||||
@ matrix_mul_float:
|
|
||||||
@ Calculate 4x4 (matrix 0) * (matrix 1) and store to result 4x4 matrix.
|
|
||||||
@ matrix 0, matrix 1 and result pointers can be the same,
|
|
||||||
@ ie. my_matrix = my_matrix * my_matrix is possible.
|
|
||||||
@
|
|
||||||
@ r0 = pointer to 4x4 result matrix, single precision floats, column major order
|
|
||||||
@ r1 = pointer to 4x4 matrix 0, single precision floats, column major order
|
|
||||||
@ r2 = pointer to 4x4 matrix 1, single precision floats, column major order
|
|
||||||
@
|
|
||||||
|
|
||||||
.globl _fast_matrix_mul_4x4_neon
|
|
||||||
_fast_matrix_mul_4x4_neon:
|
|
||||||
.globl fast_matrix_mul_4x4_neon
|
|
||||||
fast_matrix_mul_4x4_neon:
|
|
||||||
vld1.32 {d16-d19}, [r1]! @ load first eight elements of matrix 0
|
|
||||||
vld1.32 {d20-d23}, [r1]! @ load second eight elements of matrix 0
|
|
||||||
vld1.32 {d0-d3}, [r2]! @ load first eight elements of matrix 1
|
|
||||||
vld1.32 {d4-d7}, [r2]! @ load second eight elements of matrix 1
|
|
||||||
|
|
||||||
vmul.f32 q12, q8, d0[0] @ rslt col0 = (mat0 col0) * (mat1 col0 elt0)
|
|
||||||
vmul.f32 q13, q8, d2[0] @ rslt col1 = (mat0 col0) * (mat1 col1 elt0)
|
|
||||||
vmul.f32 q14, q8, d4[0] @ rslt col2 = (mat0 col0) * (mat1 col2 elt0)
|
|
||||||
vmul.f32 q15, q8, d6[0] @ rslt col3 = (mat0 col0) * (mat1 col3 elt0)
|
|
||||||
|
|
||||||
vmla.f32 q12, q9, d0[1] @ rslt col0 += (mat0 col1) * (mat1 col0 elt1)
|
|
||||||
vmla.f32 q13, q9, d2[1] @ rslt col1 += (mat0 col1) * (mat1 col1 elt1)
|
|
||||||
vmla.f32 q14, q9, d4[1] @ rslt col2 += (mat0 col1) * (mat1 col2 elt1)
|
|
||||||
vmla.f32 q15, q9, d6[1] @ rslt col3 += (mat0 col1) * (mat1 col3 elt1)
|
|
||||||
|
|
||||||
vmla.f32 q12, q10, d1[0] @ rslt col0 += (mat0 col2) * (mat1 col0 elt2)
|
|
||||||
vmla.f32 q13, q10, d3[0] @ rslt col1 += (mat0 col2) * (mat1 col1 elt2)
|
|
||||||
vmla.f32 q14, q10, d5[0] @ rslt col2 += (mat0 col2) * (mat1 col2 elt2)
|
|
||||||
vmla.f32 q15, q10, d7[0] @ rslt col3 += (mat0 col2) * (mat1 col2 elt2)
|
|
||||||
|
|
||||||
vmla.f32 q12, q11, d1[1] @ rslt col0 += (mat0 col3) * (mat1 col0 elt3)
|
|
||||||
vmla.f32 q13, q11, d3[1] @ rslt col1 += (mat0 col3) * (mat1 col1 elt3)
|
|
||||||
vmla.f32 q14, q11, d5[1] @ rslt col2 += (mat0 col3) * (mat1 col2 elt3)
|
|
||||||
vmla.f32 q15, q11, d7[1] @ rslt col3 += (mat0 col3) * (mat1 col3 elt3)
|
|
||||||
|
|
||||||
vst1.32 {d24-d27}, [r0]! @ store first eight elements of result
|
|
||||||
vst1.32 {d28-d31}, [r0]! @ store second eight elements of result
|
|
||||||
bx lr
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,25 +0,0 @@
|
|||||||
#include "ppsspp_config.h"
|
|
||||||
|
|
||||||
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
|
||||||
|
|
||||||
#include <emmintrin.h>
|
|
||||||
|
|
||||||
#include "fast_matrix.h"
|
|
||||||
|
|
||||||
void fast_matrix_mul_4x4_sse(float *dest, const float *a, const float *b) {
|
|
||||||
int i;
|
|
||||||
__m128 a_col_1 = _mm_loadu_ps(a);
|
|
||||||
__m128 a_col_2 = _mm_loadu_ps(&a[4]);
|
|
||||||
__m128 a_col_3 = _mm_loadu_ps(&a[8]);
|
|
||||||
__m128 a_col_4 = _mm_loadu_ps(&a[12]);
|
|
||||||
|
|
||||||
for (i = 0; i < 16; i += 4) {
|
|
||||||
__m128 r_col = _mm_mul_ps(a_col_1, _mm_set1_ps(b[i]));
|
|
||||||
r_col = _mm_add_ps(r_col, _mm_mul_ps(a_col_2, _mm_set1_ps(b[i + 1])));
|
|
||||||
r_col = _mm_add_ps(r_col, _mm_mul_ps(a_col_3, _mm_set1_ps(b[i + 2])));
|
|
||||||
r_col = _mm_add_ps(r_col, _mm_mul_ps(a_col_4, _mm_set1_ps(b[i + 3])));
|
|
||||||
_mm_storeu_ps(&dest[i], r_col);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -234,6 +234,7 @@ bool DrawEngineCommon::TestBoundingBox(const void* control_points, int vertexCou
|
|||||||
float worldviewproj[16];
|
float worldviewproj[16];
|
||||||
ConvertMatrix4x3To4x4(world, gstate.worldMatrix);
|
ConvertMatrix4x3To4x4(world, gstate.worldMatrix);
|
||||||
ConvertMatrix4x3To4x4(view, gstate.viewMatrix);
|
ConvertMatrix4x3To4x4(view, gstate.viewMatrix);
|
||||||
|
// TODO: Create a Matrix4x3ByMatrix4x3, and Matrix4x4ByMatrix4x3?
|
||||||
Matrix4ByMatrix4(worldview, world, view);
|
Matrix4ByMatrix4(worldview, world, view);
|
||||||
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);
|
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);
|
||||||
PlanesFromMatrix(worldviewproj, planes);
|
PlanesFromMatrix(worldviewproj, planes);
|
||||||
|
@ -1559,7 +1559,7 @@ void ConvertStencilFuncState(GenericStencilFuncState &state) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void GenericMaskState::Log() {
|
void GenericMaskState::Log() {
|
||||||
WARN_LOG(G3D, "Mask: %01X readfb=%d", uniformMask, channelMask, applyFramebufferRead);
|
WARN_LOG(G3D, "Mask: %08x %01X readfb=%d", uniformMask, channelMask, applyFramebufferRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenericBlendState::Log() {
|
void GenericBlendState::Log() {
|
||||||
|
@ -62,7 +62,6 @@
|
|||||||
#include "Common/System/NativeApp.h"
|
#include "Common/System/NativeApp.h"
|
||||||
#include "Common/Data/Text/I18n.h"
|
#include "Common/Data/Text/I18n.h"
|
||||||
#include "Common/Input/InputState.h"
|
#include "Common/Input/InputState.h"
|
||||||
#include "Common/Math/fast/fast_math.h"
|
|
||||||
#include "Common/Math/math_util.h"
|
#include "Common/Math/math_util.h"
|
||||||
#include "Common/Math/lin/matrix4x4.h"
|
#include "Common/Math/lin/matrix4x4.h"
|
||||||
#include "Common/Profiler/Profiler.h"
|
#include "Common/Profiler/Profiler.h"
|
||||||
@ -463,7 +462,6 @@ void NativeInit(int argc, const char *argv[], const char *savegame_dir, const ch
|
|||||||
|
|
||||||
ShaderTranslationInit();
|
ShaderTranslationInit();
|
||||||
|
|
||||||
InitFastMath();
|
|
||||||
g_threadManager.Init(cpu_info.num_cores, cpu_info.logical_cpu_count);
|
g_threadManager.Init(cpu_info.num_cores, cpu_info.logical_cpu_count);
|
||||||
|
|
||||||
g_Discord.SetPresenceMenu();
|
g_Discord.SetPresenceMenu();
|
||||||
|
@ -432,7 +432,6 @@
|
|||||||
<ClInclude Include="..\..\Common\Input\KeyCodes.h" />
|
<ClInclude Include="..\..\Common\Input\KeyCodes.h" />
|
||||||
<ClInclude Include="..\..\Common\Math\curves.h" />
|
<ClInclude Include="..\..\Common\Math\curves.h" />
|
||||||
<ClInclude Include="..\..\Common\Math\expression_parser.h" />
|
<ClInclude Include="..\..\Common\Math\expression_parser.h" />
|
||||||
<ClInclude Include="..\..\Common\Math\fast\fast_math.h" />
|
|
||||||
<ClInclude Include="..\..\Common\Math\fast\fast_matrix.h" />
|
<ClInclude Include="..\..\Common\Math\fast\fast_matrix.h" />
|
||||||
<ClInclude Include="..\..\Common\Math\geom2d.h" />
|
<ClInclude Include="..\..\Common\Math\geom2d.h" />
|
||||||
<ClInclude Include="..\..\Common\Math\lin\matrix4x4.h" />
|
<ClInclude Include="..\..\Common\Math\lin\matrix4x4.h" />
|
||||||
@ -555,9 +554,7 @@
|
|||||||
<ClCompile Include="..\..\Common\Input\InputState.cpp" />
|
<ClCompile Include="..\..\Common\Input\InputState.cpp" />
|
||||||
<ClCompile Include="..\..\Common\Math\curves.cpp" />
|
<ClCompile Include="..\..\Common\Math\curves.cpp" />
|
||||||
<ClCompile Include="..\..\Common\Math\expression_parser.cpp" />
|
<ClCompile Include="..\..\Common\Math\expression_parser.cpp" />
|
||||||
<ClCompile Include="..\..\Common\Math\fast\fast_math.c" />
|
|
||||||
<ClCompile Include="..\..\Common\Math\fast\fast_matrix.c" />
|
<ClCompile Include="..\..\Common\Math\fast\fast_matrix.c" />
|
||||||
<ClCompile Include="..\..\Common\Math\fast\fast_matrix_sse.c" />
|
|
||||||
<ClCompile Include="..\..\Common\Math\lin\matrix4x4.cpp" />
|
<ClCompile Include="..\..\Common\Math\lin\matrix4x4.cpp" />
|
||||||
<ClCompile Include="..\..\Common\Math\lin\vec3.cpp" />
|
<ClCompile Include="..\..\Common\Math\lin\vec3.cpp" />
|
||||||
<ClCompile Include="..\..\Common\Math\math_util.cpp" />
|
<ClCompile Include="..\..\Common\Math\math_util.cpp" />
|
||||||
|
@ -225,15 +225,9 @@
|
|||||||
<ClCompile Include="..\..\Common\Math\math_util.cpp">
|
<ClCompile Include="..\..\Common\Math\math_util.cpp">
|
||||||
<Filter>Math</Filter>
|
<Filter>Math</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\Common\Math\fast\fast_math.c">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="..\..\Common\Math\fast\fast_matrix.c">
|
<ClCompile Include="..\..\Common\Math\fast\fast_matrix.c">
|
||||||
<Filter>Math\fast</Filter>
|
<Filter>Math\fast</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\Common\Math\fast\fast_matrix_sse.c">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</ClCompile>
|
|
||||||
<ClCompile Include="..\..\Common\Math\lin\matrix4x4.cpp">
|
<ClCompile Include="..\..\Common\Math\lin\matrix4x4.cpp">
|
||||||
<Filter>Math\lin</Filter>
|
<Filter>Math\lin</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
@ -529,9 +523,6 @@
|
|||||||
<ClInclude Include="..\..\Common\Math\math_util.h">
|
<ClInclude Include="..\..\Common\Math\math_util.h">
|
||||||
<Filter>Math</Filter>
|
<Filter>Math</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
<ClInclude Include="..\..\Common\Math\fast\fast_math.h">
|
|
||||||
<Filter>Math\fast</Filter>
|
|
||||||
</ClInclude>
|
|
||||||
<ClInclude Include="..\..\Common\Math\fast\fast_matrix.h">
|
<ClInclude Include="..\..\Common\Math\fast\fast_matrix.h">
|
||||||
<Filter>Math\fast</Filter>
|
<Filter>Math\fast</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
@ -11,19 +11,16 @@ ARCH_FILES := \
|
|||||||
$(SRC)/Common/ABI.cpp \
|
$(SRC)/Common/ABI.cpp \
|
||||||
$(SRC)/Common/x64Emitter.cpp \
|
$(SRC)/Common/x64Emitter.cpp \
|
||||||
$(SRC)/Common/x64Analyzer.cpp \
|
$(SRC)/Common/x64Analyzer.cpp \
|
||||||
$(SRC)/Common/Math/fast/fast_matrix_sse.c \
|
|
||||||
$(SRC)/Common/Thunk.cpp
|
$(SRC)/Common/Thunk.cpp
|
||||||
else ifeq ($(TARGET_ARCH_ABI),x86_64)
|
else ifeq ($(TARGET_ARCH_ABI),x86_64)
|
||||||
ARCH_FILES := \
|
ARCH_FILES := \
|
||||||
$(SRC)/Common/ABI.cpp \
|
$(SRC)/Common/ABI.cpp \
|
||||||
$(SRC)/Common/x64Emitter.cpp \
|
$(SRC)/Common/x64Emitter.cpp \
|
||||||
$(SRC)/Common/x64Analyzer.cpp \
|
$(SRC)/Common/x64Analyzer.cpp \
|
||||||
$(SRC)/Common/Math/fast/fast_matrix_sse.c \
|
|
||||||
$(SRC)/Common/Thunk.cpp
|
$(SRC)/Common/Thunk.cpp
|
||||||
else ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
|
else ifeq ($(findstring armeabi-v7a,$(TARGET_ARCH_ABI)),armeabi-v7a)
|
||||||
ARCH_FILES := \
|
ARCH_FILES := \
|
||||||
$(SRC)/Common/ArmEmitter.cpp \
|
$(SRC)/Common/ArmEmitter.cpp \
|
||||||
$(SRC)/Common/Math/fast/fast_matrix_neon.S.neon \
|
|
||||||
$(SRC)/ext/disarm.cpp \
|
$(SRC)/ext/disarm.cpp \
|
||||||
$(SRC)/ext/libpng17/arm/arm_init.c \
|
$(SRC)/ext/libpng17/arm/arm_init.c \
|
||||||
$(SRC)/ext/libpng17/arm/filter_neon_intrinsics.c \
|
$(SRC)/ext/libpng17/arm/filter_neon_intrinsics.c \
|
||||||
@ -164,7 +161,6 @@ EXEC_AND_LIB_FILES := \
|
|||||||
$(SRC)/Common/Render/Text/draw_text_android.cpp \
|
$(SRC)/Common/Render/Text/draw_text_android.cpp \
|
||||||
$(SRC)/Common/Input/GestureDetector.cpp \
|
$(SRC)/Common/Input/GestureDetector.cpp \
|
||||||
$(SRC)/Common/Input/InputState.cpp \
|
$(SRC)/Common/Input/InputState.cpp \
|
||||||
$(SRC)/Common/Math/fast/fast_math.c \
|
|
||||||
$(SRC)/Common/Math/fast/fast_matrix.c \
|
$(SRC)/Common/Math/fast/fast_matrix.c \
|
||||||
$(SRC)/Common/Math/math_util.cpp \
|
$(SRC)/Common/Math/math_util.cpp \
|
||||||
$(SRC)/Common/Math/curves.cpp \
|
$(SRC)/Common/Math/curves.cpp \
|
||||||
|
@ -163,7 +163,6 @@ ifeq ($(PLATFORM_EXT), android)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
SOURCES_C +=\
|
SOURCES_C +=\
|
||||||
$(COMMONDIR)/Math/fast/fast_math.c \
|
|
||||||
$(COMMONDIR)/Math/fast/fast_matrix.c
|
$(COMMONDIR)/Math/fast/fast_matrix.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@ -633,8 +632,7 @@ ifeq ($(WITH_DYNAREC),1)
|
|||||||
|
|
||||||
SOURCES_C += $(EXTDIR)/libpng17/arm/arm_init.c \
|
SOURCES_C += $(EXTDIR)/libpng17/arm/arm_init.c \
|
||||||
$(EXTDIR)/libpng17/arm/filter_neon_intrinsics.c
|
$(EXTDIR)/libpng17/arm/filter_neon_intrinsics.c
|
||||||
ASMFILES += $(COMMONDIR)/Math/fast/fast_matrix_neon.S \
|
ASMFILES += $(EXTDIR)/libpng17/arm/filter_neon.S
|
||||||
$(EXTDIR)/libpng17/arm/filter_neon.S
|
|
||||||
endif
|
endif
|
||||||
else ifeq ($(TARGET_ARCH),arm64)
|
else ifeq ($(TARGET_ARCH),arm64)
|
||||||
DYNAFLAGS += -D_ARCH_64
|
DYNAFLAGS += -D_ARCH_64
|
||||||
|
Loading…
Reference in New Issue
Block a user