mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-04 01:11:44 +00:00
MachineCopyPropagation: Remove the copies instead of using KILL instructions.
For some history here see the commit messages of r199797 and r169060. The original intent was to fix cases like: %EAX<def> = COPY %ECX<kill>, %RAX<imp-def> %RCX<def> = COPY %RAX<kill> where simply removing the copies would have RCX undefined as in terms of machine operands only the ECX part of it is defined. The machine verifier would complain about this so 169060 changed such COPY instructions into KILL instructions so some super-register imp-defs would be preserved. In r199797 it was finally decided to always do this regardless of super-register defs. But this is wrong, consider: R1 = COPY R0 ... R0 = COPY R1 getting changed to: R1 = KILL R0 ... R0 = KILL R1 It now looks like R0 dies at the first KILL and won't be alive until the second KILL, while in reality R0 is alive and must not change in this part of the program. As this only happens after register allocation there is not much code still performing liveness queries so the issue was not noticed. In fact I didn't manage to create a testcase for this, without unrelated changes I am working on at the moment. The fix is simple: As of r223896 the MachineVerifier allows reads from partially defined registers, so the whole transforming COPY->KILL thing is not necessary anymore. This patch also changes a similar (but more benign case as the def and src are the same register) case in the VirtRegRewriter. Differential Revision: http://reviews.llvm.org/D10117 llvm-svn: 238588
This commit is contained in:
parent
bb8e58f100
commit
f3c791538e
@ -54,7 +54,6 @@ namespace {
|
||||
SourceMap &SrcMap,
|
||||
DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
|
||||
bool CopyPropagateBlock(MachineBasicBlock &MBB);
|
||||
void removeCopy(MachineInstr *MI);
|
||||
};
|
||||
}
|
||||
char MachineCopyPropagation::ID = 0;
|
||||
@ -127,13 +126,6 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Remove MI from the function because it has been determined it is dead.
|
||||
// Turn it into a noop KILL instruction as opposed to removing it to
|
||||
// maintain imp-use/imp-def chains.
|
||||
void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
|
||||
MI->setDesc(TII->get(TargetOpcode::KILL));
|
||||
}
|
||||
|
||||
bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
|
||||
DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
|
||||
@ -183,7 +175,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
|
||||
I->clearRegisterKills(Def, TRI);
|
||||
|
||||
removeCopy(MI);
|
||||
MI->eraseFromParent();
|
||||
Changed = true;
|
||||
++NumDeletes;
|
||||
continue;
|
||||
@ -291,7 +283,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
continue;
|
||||
DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
|
||||
(*DI)->dump());
|
||||
removeCopy(*DI);
|
||||
(*DI)->eraseFromParent();
|
||||
Changed = true;
|
||||
++NumDeletes;
|
||||
}
|
||||
@ -327,7 +319,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
|
||||
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
|
||||
DI != DE; ++DI) {
|
||||
if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
|
||||
removeCopy(*DI);
|
||||
(*DI)->eraseFromParent();
|
||||
Changed = true;
|
||||
++NumDeletes;
|
||||
}
|
||||
|
@ -417,17 +417,11 @@ void VirtRegRewriter::rewrite() {
|
||||
// Finally, remove any identity copies.
|
||||
if (MI->isIdentityCopy()) {
|
||||
++NumIdCopies;
|
||||
if (MI->getNumOperands() == 2) {
|
||||
DEBUG(dbgs() << "Deleting identity copy.\n");
|
||||
if (Indexes)
|
||||
Indexes->removeMachineInstrFromMaps(MI);
|
||||
// It's safe to erase MI because MII has already been incremented.
|
||||
MI->eraseFromParent();
|
||||
} else {
|
||||
// Transform identity copy to a KILL to deal with subregisters.
|
||||
MI->setDesc(TII->get(TargetOpcode::KILL));
|
||||
DEBUG(dbgs() << "Identity copy: " << *MI);
|
||||
}
|
||||
DEBUG(dbgs() << "Deleting identity copy.\n");
|
||||
if (Indexes)
|
||||
Indexes->removeMachineInstrFromMaps(MI);
|
||||
// It's safe to erase MI because MII has already been incremented.
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -116,11 +116,8 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
|
||||
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
; KNL-LABEL: test9:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
|
||||
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
|
||||
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
|
||||
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
|
||||
; KNL-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||
@ -130,11 +127,8 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
|
||||
; KNL-LABEL: test10:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
|
||||
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
|
||||
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
|
||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
|
||||
; KNL-NEXT: retq
|
||||
; SKX-LABEL: test10:
|
||||
; SKX: ## BB#0:
|
||||
@ -166,7 +160,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
|
||||
; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
|
||||
; KNL-NEXT: kunpckbw %k0, %k1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
|
||||
; KNL-NEXT: retq
|
||||
%res = icmp eq <16 x i64> %a, %b
|
||||
%res1 = bitcast <16 x i1> %res to i16
|
||||
|
@ -11,8 +11,7 @@
|
||||
@NullToken = external global i64
|
||||
|
||||
; CHECK-LABEL: Part_Create:
|
||||
; CHECK-DAG: # kill: RDI<def>
|
||||
; CHECK-DAG: movq PartClass@GOTPCREL(%rip), %r10
|
||||
; CHECK: movq PartClass@GOTPCREL(%rip), %r10
|
||||
define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
|
||||
%PartObj = alloca i64*, align 8
|
||||
%Vchunk = alloca i64, align 8
|
||||
|
@ -239,7 +239,6 @@ define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movapd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
|
||||
; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
|
||||
; SSE2-NEXT: subsd %xmm3, %xmm0
|
||||
; SSE2-NEXT: cvttsd2si %xmm0, %rcx
|
||||
; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
|
||||
@ -589,7 +588,6 @@ define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
|
||||
; SSE2-LABEL: fptoui_8vf32:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movaps %xmm0, %xmm2
|
||||
; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; SSE2-NEXT: cvttss2si %xmm0, %rax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
|
@ -843,7 +843,6 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
|
||||
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
||||
; ALL-LABEL: insert_reg_and_zero_v4f64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def>
|
||||
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
||||
; ALL-NEXT: retq
|
||||
|
@ -9,7 +9,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: # kill
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pand .LCPI0_0(%rip), %xmm1
|
||||
@ -19,7 +18,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: # kill
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
|
||||
; SSSE3-NEXT: pand .LCPI0_0(%rip), %xmm1
|
||||
@ -156,7 +154,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: # kill
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSE2-NEXT: pand .LCPI3_0(%rip), %xmm1
|
||||
@ -166,7 +163,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: # kill
|
||||
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; SSSE3-NEXT: pand .LCPI3_0(%rip), %xmm1
|
||||
@ -334,7 +330,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: # kill
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSE2-NEXT: retq
|
||||
@ -343,7 +338,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: # kill
|
||||
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; SSSE3-NEXT: retq
|
||||
@ -366,7 +360,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone
|
||||
;
|
||||
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: # kill
|
||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX2-NEXT: retq
|
||||
entry:
|
||||
@ -380,7 +373,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
; SSE2-NEXT: # kill
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSE2-NEXT: retq
|
||||
@ -389,7 +381,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
|
||||
; SSSE3: # BB#0: # %entry
|
||||
; SSSE3-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSSE3-NEXT: pxor %xmm2, %xmm2
|
||||
; SSSE3-NEXT: # kill
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; SSSE3-NEXT: retq
|
||||
@ -413,7 +404,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone
|
||||
;
|
||||
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: # kill
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: retq
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user