From f3c791538ed7fefa78969f9ac3789158aadf6df9 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 29 May 2015 18:19:25 +0000 Subject: [PATCH] MachineCopyPropagation: Remove the copies instead of using KILL instructions. For some history here see the commit messages of r199797 and r169060. The original intent was to fix cases like: %EAX = COPY %ECX, %RAX %RCX = COPY %RAX where simply removing the copies would have RCX undefined as in terms of machine operands only the ECX part of it is defined. The machine verifier would complain about this so 169060 changed such COPY instructions into KILL instructions so some super-register imp-defs would be preserved. In r199797 it was finally decided to always do this regardless of super-register defs. But this is wrong, consider: R1 = COPY R0 ... R0 = COPY R1 getting changed to: R1 = KILL R0 ... R0 = KILL R1 It now looks like R0 dies at the first KILL and won't be alive until the second KILL, while in reality R0 is alive and must not change in this part of the program. As this only happens after register allocation there is not much code still performing liveness queries so the issue was not noticed. In fact I didn't manage to create a testcase for this, without unrelated changes I am working on at the moment. The fix is simple: As of r223896 the MachineVerifier allows reads from partially defined registers, so the whole transforming COPY->KILL thing is not necessary anymore. This patch also changes a similar (but more benign case as the def and src are the same register) case in the VirtRegRewriter. Differential Revision: http://reviews.llvm.org/D10117 llvm-svn: 238588 --- lib/CodeGen/MachineCopyPropagation.cpp | 14 +++----------- lib/CodeGen/VirtRegMap.cpp | 16 +++++----------- test/CodeGen/X86/avx512-vec-cmp.ll | 7 ------- test/CodeGen/X86/critical-anti-dep-breaker.ll | 3 +-- test/CodeGen/X86/vec_fp_to_int.ll | 2 -- test/CodeGen/X86/vector-shuffle-256-v4.ll | 1 - test/CodeGen/X86/vector-zext.ll | 10 ---------- 7 files changed, 9 insertions(+), 44 deletions(-) diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 602c1fa8d00..a6863412132 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -54,7 +54,6 @@ namespace { SourceMap &SrcMap, DenseMap &AvailCopyMap); bool CopyPropagateBlock(MachineBasicBlock &MBB); - void removeCopy(MachineInstr *MI); }; } char MachineCopyPropagation::ID = 0; @@ -127,13 +126,6 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, return false; } -// Remove MI from the function because it has been determined it is dead. -// Turn it into a noop KILL instruction as opposed to removing it to -// maintain imp-use/imp-def chains. -void MachineCopyPropagation::removeCopy(MachineInstr *MI) { - MI->setDesc(TII->get(TargetOpcode::KILL)); -} - bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector MaybeDeadCopies; // Candidates for deletion DenseMap AvailCopyMap; // Def -> available copies map @@ -183,7 +175,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) I->clearRegisterKills(Def, TRI); - removeCopy(MI); + MI->eraseFromParent(); Changed = true; ++NumDeletes; continue; @@ -291,7 +283,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { continue; DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; (*DI)->dump()); - removeCopy(*DI); + (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } @@ -327,7 +319,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { - removeCopy(*DI); + (*DI)->eraseFromParent(); Changed = true; ++NumDeletes; } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 9fb1b5b65fb..32d5100f849 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -417,17 +417,11 @@ void VirtRegRewriter::rewrite() { // Finally, remove any identity copies. if (MI->isIdentityCopy()) { ++NumIdCopies; - if (MI->getNumOperands() == 2) { - DEBUG(dbgs() << "Deleting identity copy.\n"); - if (Indexes) - Indexes->removeMachineInstrFromMaps(MI); - // It's safe to erase MI because MII has already been incremented. - MI->eraseFromParent(); - } else { - // Transform identity copy to a KILL to deal with subregisters. - MI->setDesc(TII->get(TargetOpcode::KILL)); - DEBUG(dbgs() << "Identity copy: " << *MI); - } + DEBUG(dbgs() << "Deleting identity copy.\n"); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); } } } diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index e1f6276c6ef..04028a1da51 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -116,11 +116,8 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { ; KNL-LABEL: test9: ; KNL: ## BB#0: -; KNL-NEXT: ## kill: YMM1 YMM1 ZMM1 -; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 ; KNL-NEXT: retq %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y @@ -130,11 +127,8 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { ; KNL-LABEL: test10: ; KNL: ## BB#0: -; KNL-NEXT: ## kill: YMM1 YMM1 ZMM1 -; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: YMM0 YMM0 ZMM0 ; KNL-NEXT: retq ; SKX-LABEL: test10: ; SKX: ## BB#0: @@ -166,7 +160,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ; KNL-NEXT: kunpckbw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: ## kill: AX AX EAX ; KNL-NEXT: retq %res = icmp eq <16 x i64> %a, %b %res1 = bitcast <16 x i1> %res to i16 diff --git a/test/CodeGen/X86/critical-anti-dep-breaker.ll b/test/CodeGen/X86/critical-anti-dep-breaker.ll index 86afc1f245a..de5744d3a88 100644 --- a/test/CodeGen/X86/critical-anti-dep-breaker.ll +++ b/test/CodeGen/X86/critical-anti-dep-breaker.ll @@ -11,8 +11,7 @@ @NullToken = external global i64 ; CHECK-LABEL: Part_Create: -; CHECK-DAG: # kill: RDI -; CHECK-DAG: movq PartClass@GOTPCREL(%rip), %r10 +; CHECK: movq PartClass@GOTPCREL(%rip), %r10 define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) { %PartObj = alloca i64*, align 8 %Vchunk = alloca i64, align 8 diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index 9f1c7afa295..3e72212d85d 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -239,7 +239,6 @@ define <4 x i64> @fptoui_4vf64(<4 x double> %a) { ; SSE2: # BB#0: ; SSE2-NEXT: movapd %xmm0, %xmm2 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero -; SSE2-NEXT: {{.*#+}} kill: XMM0 XMM2 ; SSE2-NEXT: subsd %xmm3, %xmm0 ; SSE2-NEXT: cvttsd2si %xmm0, %rcx ; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 @@ -589,7 +588,6 @@ define <8 x i32> @fptoui_8vf32(<8 x float> %a) { ; SSE2-LABEL: fptoui_8vf32: ; SSE2: # BB#0: ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: {{.*#+}} kill: XMM0 XMM2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: cvttss2si %xmm0, %rax ; SSE2-NEXT: movd %eax, %xmm0 diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 1b42a637907..944ec4b8d3a 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -843,7 +843,6 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { define <4 x double> @insert_reg_and_zero_v4f64(double %a) { ; ALL-LABEL: insert_reg_and_zero_v4f64: ; ALL: # BB#0: -; ALL-NEXT: # kill: XMM0 XMM0 YMM0 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] ; ALL-NEXT: retq diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 42781830ff2..c64e1744267 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -9,7 +9,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: # kill ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; SSE2-NEXT: pand .LCPI0_0(%rip), %xmm1 @@ -19,7 +18,6 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: # kill ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] ; SSSE3-NEXT: pand .LCPI0_0(%rip), %xmm1 @@ -156,7 +154,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) { ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: # kill ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: pand .LCPI3_0(%rip), %xmm1 @@ -166,7 +163,6 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) { ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: # kill ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSSE3-NEXT: pand .LCPI3_0(%rip), %xmm1 @@ -334,7 +330,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: # kill ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE2-NEXT: retq @@ -343,7 +338,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: # kill ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSSE3-NEXT: retq @@ -366,7 +360,6 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; ; AVX2-LABEL: shuf_zext_8i16_to_8i32: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: # kill ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq entry: @@ -380,7 +373,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: # kill ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE2-NEXT: retq @@ -389,7 +381,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: # kill ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSSE3-NEXT: retq @@ -413,7 +404,6 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; ; AVX2-LABEL: shuf_zext_4i32_to_4i64: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: # kill ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: retq entry: