mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-03 17:24:24 +00:00
Add the PPC64 ldbrx/stdbrx instructions
These are 64-bit load/store with byte-swap, and available on the P7 and the A2. Like the similar instructions for 16- and 32-bit words, these are matched in the target DAG-combine phase against load/store-bswap pairs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178276 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ce88835110
commit
efdd4673d6
@ -63,6 +63,8 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
|
||||
"Enable the isel instruction">;
|
||||
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
|
||||
"Enable the popcnt[dw] instructions">;
|
||||
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
|
||||
"Enable the ldbrx instruction">;
|
||||
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
|
||||
"Enable Book E instructions">;
|
||||
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
|
||||
@ -77,7 +79,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
|
||||
// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz
|
||||
// FRE p5 through p7 fre (vs. fres, available since p3)
|
||||
// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
|
||||
// LDBRX p7 load with byte reversal
|
||||
// LFIWAX p6, p6x, p7 lfiwax
|
||||
// LFIWZX p7 lfiwzx
|
||||
// POPCNTB p5 through p7 popcntb and related instructions
|
||||
@ -129,17 +130,16 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
|
||||
def : ProcessorModel<"e5500", PPCE5500Model,
|
||||
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
|
||||
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
|
||||
def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
|
||||
FeatureMFOCRF, FeatureFSqrt,
|
||||
FeatureSTFIWX, FeatureISEL,
|
||||
FeaturePOPCNTD, Feature64Bit
|
||||
/*, Feature64BitRegs */]>;
|
||||
def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
|
||||
FeatureMFOCRF, FeatureFSqrt,
|
||||
FeatureSTFIWX, FeatureISEL,
|
||||
FeaturePOPCNTD, Feature64Bit
|
||||
/*, Feature64BitRegs */,
|
||||
FeatureQPX]>;
|
||||
def : Processor<"a2", PPCA2Itineraries,
|
||||
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
||||
FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
|
||||
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
|
||||
/*, Feature64BitRegs */]>;
|
||||
def : Processor<"a2q", PPCA2Itineraries,
|
||||
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
|
||||
FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
|
||||
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
|
||||
/*, Feature64BitRegs */, FeatureQPX]>;
|
||||
def : Processor<"pwr3", G5Itineraries,
|
||||
[DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
|
||||
FeatureSTFIWX, Feature64Bit]>;
|
||||
@ -162,8 +162,8 @@ def : Processor<"pwr6x", G5Itineraries,
|
||||
def : Processor<"pwr7", G5Itineraries,
|
||||
[DirectivePwr7, FeatureAltivec,
|
||||
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
|
||||
FeatureISEL, FeaturePOPCNTD, Feature64Bit
|
||||
/*, Feature64BitRegs */]>;
|
||||
FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
|
||||
Feature64Bit /*, Feature64BitRegs */]>;
|
||||
def : Processor<"ppc", G3Itineraries, [Directive32]>;
|
||||
def : Processor<"ppc64", G5Itineraries,
|
||||
[Directive64, FeatureAltivec,
|
||||
|
@ -6604,7 +6604,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
N->getOperand(1).getOpcode() == ISD::BSWAP &&
|
||||
N->getOperand(1).getNode()->hasOneUse() &&
|
||||
(N->getOperand(1).getValueType() == MVT::i32 ||
|
||||
N->getOperand(1).getValueType() == MVT::i16)) {
|
||||
N->getOperand(1).getValueType() == MVT::i16 ||
|
||||
(TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
|
||||
N->getOperand(1).getValueType() == MVT::i64))) {
|
||||
SDValue BSwapOp = N->getOperand(1).getOperand(0);
|
||||
// Do an any-extend to 32-bits if this is a half-word input.
|
||||
if (BSwapOp.getValueType() == MVT::i16)
|
||||
@ -6625,7 +6627,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
|
||||
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
|
||||
N->getOperand(0).hasOneUse() &&
|
||||
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
|
||||
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
|
||||
(TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
|
||||
N->getValueType(0) == MVT::i64))) {
|
||||
SDValue Load = N->getOperand(0);
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Load);
|
||||
// Create the byte-swapping load.
|
||||
@ -6636,7 +6640,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
};
|
||||
SDValue BSLoad =
|
||||
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
|
||||
DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
|
||||
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
|
||||
MVT::i64 : MVT::i32, MVT::Other),
|
||||
Ops, 3,
|
||||
LD->getMemoryVT(), LD->getMemOperand());
|
||||
|
||||
// If this is an i16 load, insert the truncate.
|
||||
|
@ -658,7 +658,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
|
||||
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
|
||||
"ldx $rD, $src", LdStLD,
|
||||
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
|
||||
|
||||
def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
|
||||
"ldbrx $rD, $src", LdStLoad,
|
||||
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
|
||||
"ldu $rD, $addr", LdStLDU,
|
||||
@ -779,6 +782,11 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
|
||||
"stdx $rS, $dst", LdStSTD,
|
||||
[(store i64:$rS, xaddr:$dst)]>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
|
||||
"stdbrx $rS, $dst", LdStStore,
|
||||
[(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
|
||||
PPC970_DGroup_Cracked;
|
||||
|
||||
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
|
||||
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
|
||||
"std $rT, $dst", LdStSTD,
|
||||
|
@ -36,10 +36,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
|
||||
]>;
|
||||
|
||||
def SDT_PPClbrx : SDTypeProfile<1, 2, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
|
||||
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
|
||||
]>;
|
||||
def SDT_PPCstbrx : SDTypeProfile<0, 3, [
|
||||
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
|
||||
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
|
||||
]>;
|
||||
|
||||
def SDT_PPClarx : SDTypeProfile<1, 1, [
|
||||
|
@ -41,6 +41,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
|
||||
, HasSTFIWX(false)
|
||||
, HasISEL(false)
|
||||
, HasPOPCNTD(false)
|
||||
, HasLDBRX(false)
|
||||
, IsBookE(false)
|
||||
, HasLazyResolverStubs(false)
|
||||
, IsJITCodeModel(false)
|
||||
|
@ -80,6 +80,7 @@ protected:
|
||||
bool HasSTFIWX;
|
||||
bool HasISEL;
|
||||
bool HasPOPCNTD;
|
||||
bool HasLDBRX;
|
||||
bool IsBookE;
|
||||
bool HasLazyResolverStubs;
|
||||
bool IsJITCodeModel;
|
||||
@ -161,6 +162,7 @@ public:
|
||||
bool hasMFOCRF() const { return HasMFOCRF; }
|
||||
bool hasISEL() const { return HasISEL; }
|
||||
bool hasPOPCNTD() const { return HasPOPCNTD; }
|
||||
bool hasLDBRX() const { return HasLDBRX; }
|
||||
bool isBookE() const { return IsBookE; }
|
||||
|
||||
const Triple &getTargetTriple() const { return TargetTriple; }
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
|
||||
; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
|
||||
|
||||
|
||||
define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
|
||||
@ -34,18 +35,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
|
||||
ret i16 %tmp6
|
||||
}
|
||||
|
||||
define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
|
||||
%tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
|
||||
%tmp1.upgrd.1 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
|
||||
%tmp13 = tail call i64 @llvm.bswap.i64( i64 %i ) ; <i64> [#uses=1]
|
||||
store i64 %tmp13, i64* %tmp1.upgrd.1
|
||||
ret void
|
||||
}
|
||||
|
||||
define i64 @LDBRX(i8* %ptr, i64 %off) {
|
||||
%tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
|
||||
%tmp1.upgrd.2 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
|
||||
%tmp = load i64* %tmp1.upgrd.2 ; <i64> [#uses=1]
|
||||
%tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp ) ; <i64> [#uses=1]
|
||||
ret i64 %tmp14
|
||||
}
|
||||
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
|
||||
declare i16 @llvm.bswap.i16(i16)
|
||||
|
||||
declare i64 @llvm.bswap.i64(i64)
|
||||
|
||||
|
||||
; X32: stwbrx
|
||||
; X32: lwbrx
|
||||
; X32: sthbrx
|
||||
; X32: lhbrx
|
||||
; X32-NOT: ldbrx
|
||||
; X32-NOT: stdbrx
|
||||
|
||||
; X64: stwbrx
|
||||
; X64: lwbrx
|
||||
; X64: sthbrx
|
||||
; X64: lhbrx
|
||||
; X64-NOT: ldbrx
|
||||
; X64-NOT: stdbrx
|
||||
|
||||
; PWR7: stwbrx
|
||||
; PWR7: lwbrx
|
||||
; PWR7: sthbrx
|
||||
; PWR7: lhbrx
|
||||
; PWR7: stdbrx
|
||||
; PWR7: ldbrx
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user