From 42f72e77560141b226a3a1b00fea2b455e3ddaba Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Mon, 18 Mar 2013 23:00:58 +0000 Subject: [PATCH] Fix PPC unaligned 64-bit loads and stores PPC64 supports unaligned loads and stores of 64-bit values, but in order to use the r+i forms, the offset must be a multiple of 4. Unfortunately, this cannot always be determined by examining the immediate itself because it might be available only via a TOC entry. In order to get around this issue, we additionally predicate the selection of the r+i form on the alignment of the load or store (forcing it to be at least 4 in order to select the r+i form). llvm-svn: 177338 --- lib/Target/PowerPC/PPCISelLowering.cpp | 8 ++++++- lib/Target/PowerPC/PPCInstr64Bit.td | 26 +++++++++++++++++---- lib/Target/PowerPC/PPCInstrInfo.td | 32 ++++++++++++++++++++++++++ test/CodeGen/PowerPC/unal4-std.ll | 24 +++++++++++++++++++ 4 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/PowerPC/unal4-std.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a7e9d56fa9d..741e25e2001 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1183,13 +1183,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); } else return false; @@ -1208,6 +1210,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 01201304f74..bca1bd50782 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -555,7 +555,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set G8RC:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, @@ -648,7 +649,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; + [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64; def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), "ld $rD, $src", LdStLD, []>, isPPC64; @@ -682,6 +683,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; +let isCodeGenOnly = 1 in +def LDXu : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), + "ldx $rD, $src", LdStLD, + [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), @@ -798,7 +803,7 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, @@ -833,8 +838,9 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, + [(set ptr_rc:$ea_res, + (aligned4pre_store G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, isPPC64; @@ -979,3 +985,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), (ADDIS8 G8RC:$in, tjumptable:$g)>; def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS8 G8RC:$in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst), + (STDX G8RC:$rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e94342dc..3f181aadb11 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -278,6 +278,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll new file mode 100644 index 00000000000..dc4c20cc3fe --- /dev/null +++ b/test/CodeGen/PowerPC/unal4-std.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define fastcc void @copy_to_conceal() #0 { +entry: + br i1 undef, label %if.then, label %if.end210 + +if.then: ; preds = %entry + br label %vector.body.i + +vector.body.i: ; preds = %vector.body.i, %if.then + %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ] + store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2 + br label %vector.body.i + +if.end210: ; preds = %entry + ret void + +; CHECK: @copy_to_conceal +; CHECK: stdx {{[0-9]+}}, 0, +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }