From 5e0692561db36569e4468610fe8370c25599a4b3 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Fri, 5 Aug 2016 15:22:05 +0000 Subject: [PATCH] [PowerPC] Wrong fast-isel codegen for VSX floating-point loads There were two locations where fast-isel would generate a LFD instruction with a target register class VSFRC instead of F8RC when VSX was enabled. This can ccause invalid registers to be used in certain cases, like: lfd 36, ... instead of using a VSX load instruction. The wrong register number gets silently truncated, causing invalid code to be generated. The first place is PPCFastISel::PPCEmitLoad, which had multiple problems: 1.) The IsVSSRC and IsVSFRC flags are not initialized correctly, since they are computed from resultReg, which is still zero at this point in many cases. Fixed by changing the helper routines to operate on a register class instead of a register and passing in UseRC. 2.) Even with this fixed, Is64VSXLoad is still wrong due to a typo: bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD; The second line needs to use isVSFRC (like PPCEmitStore does). 3.) Once both the above are fixed, we're now generating a VSX instruction -- but an incorrect one, since generation of an indexed instruction with null index is wrong. Fixed by copying the code handling the same issue in PPCEmitStore. The second place is PPCFastISel::PPCMaterializeFP, where we would emit an LFD to load a constant from the literal pool, and use the wrong result register class. Fixed by hardcoding a F8RC class even on systems supporting VSX. Fixes: https://llvm.org/bugs/show_bug.cgi?id=28630 Differential Revision: https://reviews.llvm.org/D22632 llvm-svn: 277823 --- lib/Target/PowerPC/PPCFastISel.cpp | 36 ++++++++++++++++++++---------- test/CodeGen/PowerPC/pr28630.ll | 13 +++++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/PowerPC/pr28630.ll diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 7e92042d2f9..b69acc5e5e7 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -146,11 +146,11 @@ class PPCFastISel final : public FastISel { bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool isValueAvailable(const Value *V) const; - bool isVSFRCRegister(unsigned Register) const { - return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID; + bool isVSFRCRegClass(const TargetRegisterClass *RC) const { + return RC->getID() == PPC::VSFRCRegClassID; } - bool isVSSRCRegister(unsigned Register) const { - return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID; + bool isVSSRCRegClass(const TargetRegisterClass *RC) const { + return RC->getID() == PPC::VSSRCRegClassID; } bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); @@ -521,10 +521,10 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // If this is a potential VSX load with an offset of 0, a VSX indexed load can // be used. - bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg); - bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg); + bool IsVSSRC = isVSSRCRegClass(UseRC); + bool IsVSFRC = isVSFRCRegClass(UseRC); bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS; - bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD; + bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD; if ((Is32VSXLoad || Is64VSXLoad) && (Addr.BaseType != Address::FrameIndexBase) && UseOffset && (Addr.Offset == 0)) { @@ -579,8 +579,18 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break; case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Addr.Base.Reg).addReg(IndexReg); + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), ResultReg); + + // If we have an index register defined we use it in the store inst, + // otherwise we use X0 as base as it makes the vector instructions to + // use zero in the computation of the effective address regardless the + // content of the register. + if (IndexReg) + MIB.addReg(Addr.Base.Reg).addReg(IndexReg); + else + MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg); } return true; @@ -657,8 +667,8 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // If this is a potential VSX store with an offset of 0, a VSX indexed store // can be used. - bool IsVSSRC = isVSSRCRegister(SrcReg); - bool IsVSFRC = isVSFRCRegister(SrcReg); + bool IsVSSRC = isVSSRCRegClass(RC); + bool IsVSFRC = isVSFRCRegClass(RC); bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS; bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD; if ((Is32VSXStore || Is64VSXStore) && @@ -1907,7 +1917,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); assert(Align > 0 && "Unexpectedly missing alignment information!"); unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + const TargetRegisterClass *RC = + (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass; + unsigned DestReg = createResultReg(RC); CodeModel::Model CModel = TM.getCodeModel(); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( diff --git a/test/CodeGen/PowerPC/pr28630.ll b/test/CodeGen/PowerPC/pr28630.ll new file mode 100644 index 00000000000..a6759fa04b5 --- /dev/null +++ b/test/CodeGen/PowerPC/pr28630.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -O0 < %s + +define double @test() { + ret double 1.000000e+00 +} + +@g = common global double 0.000000e+00, align 8 + +define double @testitd() { + %g = load double, double* @g, align 8 + ret double %g +} +