From 29faaebae93fa2a3264c97e929ad790c883ad97b Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 15 Mar 2011 05:13:13 +0000 Subject: [PATCH] Add a peephole optimization to optimize pairs of bitcasts. e.g. v2 = bitcast v1 ... v3 = bitcast v2 ... = v3 => v2 = bitcast v1 ... = v1 if v1 and v3 are of in the same register class. bitcast between i32 and fp (and others) are often not nops since they are in different register classes. These bitcast instructions are often left because they are in different basic blocks and cannot be eliminated by dag combine. rdar://9104514 llvm-svn: 127668 --- lib/CodeGen/PeepholeOptimizer.cpp | 101 +++++++++++++++++++++++++-- test/CodeGen/ARM/peephole-bitcast.ll | 23 ++++++ 2 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/ARM/peephole-bitcast.ll diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 5d7123caa01..c105bb06ebe 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -30,6 +30,15 @@ // If the "sub" instruction all ready sets (or could be modified to set) the // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. +// +// - Optimize Bitcast pairs: +// +// v1 = bitcast v0 +// v2 = bitcast v1 +// = v2 +// => +// v1 = bitcast v0 +// = v0 // //===----------------------------------------------------------------------===// @@ -57,7 +66,8 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumEliminated, "Number of compares eliminated"); +STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); +STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate foled"); namespace { @@ -85,6 +95,7 @@ namespace { } private: + bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs); @@ -243,12 +254,85 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } +/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast +/// a value cross register classes), and the source is defined by another +/// bitcast instruction B. And if the register class of source of B matches +/// the register class of instruction A, then it is legal to replace all uses +/// of the def of A with source of B. e.g. +/// %vreg0 = VMOVSR %vreg1 +/// %vreg3 = VMOVRS %vreg0 +/// Replace all uses of vreg3 with vreg1. + +bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + + unsigned Def = 0; + unsigned Src = 0; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Def = Reg; + else if (Src) + // Multiple sources? + return false; + else + Src = Reg; + } + + assert(Def && Src && "Malformed bitcast instruction!"); + + MachineInstr *DefMI = MRI->getVRegDef(Src); + if (!DefMI || !DefMI->getDesc().isBitcast()) + return false; + + unsigned SrcDef = 0; + unsigned SrcSrc = 0; + NumDefs = DefMI->getDesc().getNumDefs(); + NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = DefMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + SrcDef = Reg; + else if (SrcSrc) + // Multiple sources? + return false; + else + SrcSrc = Reg; + } + + if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) + return false; + + MRI->replaceRegWith(Def, SrcSrc); + MRI->clearKillFlags(SrcSrc); + MI->eraseFromParent(); + ++NumBitcasts; + return true; +} + /// OptimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, - MachineBasicBlock *MBB){ + MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. unsigned SrcReg; @@ -259,7 +343,7 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, // Attempt to optimize the comparison instruction. if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { - ++NumEliminated; + ++NumCmps; return true; } @@ -345,7 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->getDesc().isCompare()) { + const TargetInstrDesc &TID = MI->getDesc(); + + if (TID.isBitcast()) { + if (OptimizeBitcastInstr(MI, MBB)) { + // MI is deleted. + Changed = true; + MII = First ? I->begin() : llvm::next(PMII); + continue; + } + } else if (TID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll new file mode 100644 index 00000000000..8d95d75b1d0 --- /dev/null +++ b/test/CodeGen/ARM/peephole-bitcast.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s + +; vmov s0, r0 + vmov r0, s0 should have been optimized away. +; rdar://9104514 + +define void @t(float %x) nounwind ssp { +entry: +; CHECK: t: +; CHECK-NOT: vmov +; CHECK: bl + %0 = bitcast float %x to i32 + %cmp = icmp ult i32 %0, 2139095039 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @doSomething(float %x) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare void @doSomething(float)