mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-29 22:30:33 +00:00
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub regs, which may cause post RA scheduler reorder instructions that breakes anti dependency. For example: VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg will be expanded to VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg; An instruction that defines %D20 may be scheduled before the store by mistake. This patches adds implicit uses for such case. For the example above, it emits: VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use> llvm-svn: 199282
This commit is contained in:
parent
d818632103
commit
7e22e3a1ea
@ -479,6 +479,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
|
||||
|
||||
if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
|
||||
MIB->addRegisterKilled(SrcReg, TRI, true);
|
||||
else if (!SrcIsUndef)
|
||||
MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
|
||||
// Transfer memoperands.
|
||||
@ -604,8 +606,8 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
if (SrcIsKill) // Add an implicit kill for the super-reg.
|
||||
MIB->addRegisterKilled(SrcReg, TRI, true);
|
||||
// Add an implicit kill and use for the super-reg.
|
||||
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
55
test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
Normal file
55
test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+neon -print-before=post-RA-sched > %t 2>&1 && FileCheck < %t %s
|
||||
|
||||
define void @vst(i8* %m, [4 x i64] %v) {
|
||||
entry:
|
||||
; CHECK: vst:
|
||||
; CHECK: VST1d64Q %R{{[0-9]+}}<kill>, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
|
||||
|
||||
%v0 = extractvalue [4 x i64] %v, 0
|
||||
%v1 = extractvalue [4 x i64] %v, 1
|
||||
%v2 = extractvalue [4 x i64] %v, 2
|
||||
%v3 = extractvalue [4 x i64] %v, 3
|
||||
|
||||
%t0 = bitcast i64 %v0 to <8 x i8>
|
||||
%t1 = bitcast i64 %v1 to <8 x i8>
|
||||
%t2 = bitcast i64 %v2 to <8 x i8>
|
||||
%t3 = bitcast i64 %v3 to <8 x i8>
|
||||
|
||||
%s0 = bitcast <8 x i8> %t0 to <1 x i64>
|
||||
%s1 = bitcast <8 x i8> %t1 to <1 x i64>
|
||||
%s2 = bitcast <8 x i8> %t2 to <1 x i64>
|
||||
%s3 = bitcast <8 x i8> %t3 to <1 x i64>
|
||||
|
||||
%tmp0 = bitcast <1 x i64> %s2 to i64
|
||||
%tmp1 = bitcast <1 x i64> %s3 to i64
|
||||
|
||||
%n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0
|
||||
%n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1
|
||||
|
||||
call void @llvm.arm.neon.vst4.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
|
||||
|
||||
call void @bar(<2 x i64> %n1)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
|
||||
define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
|
||||
; CHECK: vtbx4:
|
||||
; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
|
||||
%tmp1 = load <8 x i8>* %A
|
||||
%tmp2 = load %struct.__neon_int8x8x4_t* %B
|
||||
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
|
||||
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
|
||||
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
|
||||
%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
|
||||
%tmp7 = load <8 x i8>* %C
|
||||
%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
|
||||
call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
|
||||
ret <8 x i8> %tmp8
|
||||
}
|
||||
|
||||
declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
|
||||
declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
|
||||
declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>)
|
||||
declare void @bar(<2 x i64> %arg)
|
Loading…
Reference in New Issue
Block a user