mirror of
https://github.com/RPCSX/llvm.git
synced 2025-04-12 04:56:49 +00:00
Fix merging base-updates for VLDM/VSTM: Before I switched these instructions
to use AddrMode4, there was a count of the registers stored in one of the operands. I changed that to just count the operands but forgot to adjust for the size of D registers. This was noticed by Evan as a performance problem but it is a potential correctness bug as well, since it is possible that this could merge a base update with a non-matching immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113576 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3ef1c8759a
commit
efe7d9a12f
@ -458,9 +458,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
|
|||||||
case ARM::t2STM:
|
case ARM::t2STM:
|
||||||
case ARM::VLDMS:
|
case ARM::VLDMS:
|
||||||
case ARM::VSTMS:
|
case ARM::VSTMS:
|
||||||
|
return (MI->getNumOperands() - 4) * 4;
|
||||||
case ARM::VLDMD:
|
case ARM::VLDMD:
|
||||||
case ARM::VSTMD:
|
case ARM::VSTMD:
|
||||||
return (MI->getNumOperands() - 4) * 4;
|
return (MI->getNumOperands() - 4) * 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,11 +1,15 @@
|
|||||||
; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
|
; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
|
||||||
|
|
||||||
@quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
|
@quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
|
||||||
@dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
|
@dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
|
||||||
@A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1]
|
@A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1]
|
||||||
|
|
||||||
|
; CHECK: dct_luma_sp:
|
||||||
define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
|
define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
|
||||||
entry:
|
entry:
|
||||||
|
; Make sure to use base-updating stores for saving callee-saved registers.
|
||||||
|
; CHECK-NOT: sub sp
|
||||||
|
; CHECK: vstmdb sp!
|
||||||
%predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
|
%predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
|
||||||
br label %cond_next489
|
br label %cond_next489
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user