mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
ARM: Use PLD (cache preload) in vertex decoder loop.
This commit is contained in:
parent
f650b23c90
commit
dfea160491
@ -801,6 +801,17 @@ void ARMXEmitter::CLZ(ARMReg rd, ARMReg rm)
|
||||
Write32(condition | (0x16F << 16) | (rd << 12) | (0xF1 << 4) | rm);
|
||||
}
|
||||
|
||||
void ARMXEmitter::PLD(ARMReg rn, int offset, bool forWrite) {
|
||||
_dbg_assert_msg_(JIT, offset < 0x3ff && offset > -0x3ff, "PLD: Max 12 bits of offset allowed");
|
||||
|
||||
bool U = offset >= 0;
|
||||
if (offset < 0) offset = -offset;
|
||||
bool R = !forWrite;
|
||||
// Conditions not allowed
|
||||
Write32((0xF5 << 24) | (U << 23) | (R << 22) | (1 << 20) | ((int)rn << 16) | (0xF << 12) | offset);
|
||||
}
|
||||
|
||||
|
||||
void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width)
|
||||
{
|
||||
u32 msb = (lsb + width - 1);
|
||||
|
@ -541,6 +541,7 @@ public:
|
||||
void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
|
||||
void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
|
||||
void CLZ(ARMReg rd, ARMReg rm);
|
||||
void PLD(ARMReg rd, int offset, bool forWrite = false);
|
||||
|
||||
// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
|
||||
// Just need to put an underscore here, bit annoying.
|
||||
|
@ -1058,6 +1058,8 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
}
|
||||
|
||||
JumpTarget loopStart = GetCodePtr();
|
||||
// Preload data cache ahead of reading. TODO: Experiment with the offset.
|
||||
PLD(srcReg, 64);
|
||||
for (int i = 0; i < dec.numSteps_; i++) {
|
||||
if (!CompileStep(dec, i)) {
|
||||
// Reset the code ptr and return zero to indicate that we failed.
|
||||
@ -1265,13 +1267,14 @@ void VertexDecoderJitCache::Jit_TcU16ThroughDouble() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8Prescale() {
|
||||
if (false && cpu_info.bNEON) {
|
||||
if (cpu_info.bNEON) {
|
||||
// TODO: Needs testing
|
||||
ADD(scratchReg, srcReg, dec_->tcoff);
|
||||
VLD1_lane(I_16, neonScratchReg, scratchReg, 0, false);
|
||||
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 16-bit
|
||||
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
ADD(scratchReg2, dstReg, dec_->decFmt.uvoff);
|
||||
VMUL(F_32, neonScratchReg, neonScratchReg, neonUVScaleReg);
|
||||
VADD(F_32, neonScratchReg, neonScratchReg, neonUVOffsetReg);
|
||||
VST1(F_32, neonScratchReg, scratchReg2, 1, ALIGN_NONE);
|
||||
@ -1294,12 +1297,13 @@ void VertexDecoderJitCache::Jit_TcU8Prescale() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Prescale() {
|
||||
if (false && cpu_info.bNEON) {
|
||||
if (cpu_info.bNEON) {
|
||||
// TODO: Needs testing
|
||||
ADD(scratchReg, srcReg, dec_->tcoff);
|
||||
VLD1_lane(I_32, neonScratchReg, scratchReg, 0, false);
|
||||
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
ADD(scratchReg2, dstReg, dec_->decFmt.uvoff);
|
||||
VMUL(F_32, neonScratchReg, neonScratchReg, neonUVScaleReg);
|
||||
VADD(F_32, neonScratchReg, neonScratchReg, neonUVOffsetReg);
|
||||
VST1(F_32, neonScratchReg, scratchReg2, 1, ALIGN_NONE);
|
||||
|
@ -52,8 +52,6 @@ void TestCode::Generate()
|
||||
VLD1_all_lanes(F_32, Q2, R1, true);
|
||||
ADD(R0, R0, 12);
|
||||
VLD1_lane(F_32, D4, R0, 1, true);
|
||||
u32 word = *(u32 *)(GetCodePtr() - 4);
|
||||
ILOG("Instruction Word: %08x", word);
|
||||
// VMUL(F_32, Q2, Q0, Q1);
|
||||
VST1(F_32, D4, R2, 2);
|
||||
*/
|
||||
@ -70,6 +68,10 @@ void TestCode::Generate()
|
||||
VST1(I_32, D2, R1, 2);
|
||||
VST1(I_32, D4, R2, 2);
|
||||
VST1(I_32, D6, R3, 2);
|
||||
PLD(R1, 32);
|
||||
u32 word = *(u32 *)(GetCodePtr() - 4);
|
||||
ILOG("Instruction Word: %08x", word);
|
||||
|
||||
|
||||
// This works!
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user