mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 22:20:37 +00:00
[X86] Use ADD/SUB instead of INC/DEC for Silvermont
According to Intel Software Optimization Manual on Silvermont INC or DEC instructions require an additional uop to merge the flags. As a result, a branch instruction depending on an INC or a DEC instruction incurs a 1 cycle penalty. Differential Revision: http://reviews.llvm.org/D3990 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210466 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a8d18fe946
commit
a2bc6951a0
@ -168,6 +168,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
|
|||||||
"LEA instruction needs inputs at AG stage">;
|
"LEA instruction needs inputs at AG stage">;
|
||||||
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
|
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
|
||||||
"LEA instruction with certain arguments is slow">;
|
"LEA instruction with certain arguments is slow">;
|
||||||
|
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
|
||||||
|
"INC and DEC instructions are slower than ADD and SUB">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 processors supported.
|
// X86 processors supported.
|
||||||
@ -228,7 +230,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
|
|||||||
FeaturePCLMUL, FeatureAES,
|
FeaturePCLMUL, FeatureAES,
|
||||||
FeatureCallRegIndirect,
|
FeatureCallRegIndirect,
|
||||||
FeaturePRFCHW,
|
FeaturePRFCHW,
|
||||||
FeatureSlowLEA,
|
FeatureSlowLEA, FeatureSlowIncDec,
|
||||||
FeatureSlowBTMem, FeatureFastUAMem]>;
|
FeatureSlowBTMem, FeatureFastUAMem]>;
|
||||||
// "Arrandale" along with corei3 and corei5
|
// "Arrandale" along with corei3 and corei5
|
||||||
def : ProcessorModel<"corei7", SandyBridgeModel,
|
def : ProcessorModel<"corei7", SandyBridgeModel,
|
||||||
|
@ -10139,14 +10139,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
|
|||||||
if (ConstantSDNode *C =
|
if (ConstantSDNode *C =
|
||||||
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
|
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
|
||||||
// An add of one will be selected as an INC.
|
// An add of one will be selected as an INC.
|
||||||
if (C->getAPIntValue() == 1) {
|
if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
|
||||||
Opcode = X86ISD::INC;
|
Opcode = X86ISD::INC;
|
||||||
NumOperands = 1;
|
NumOperands = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// An add of negative one (subtract of one) will be selected as a DEC.
|
// An add of negative one (subtract of one) will be selected as a DEC.
|
||||||
if (C->getAPIntValue().isAllOnesValue()) {
|
if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
|
||||||
Opcode = X86ISD::DEC;
|
Opcode = X86ISD::DEC;
|
||||||
NumOperands = 1;
|
NumOperands = 1;
|
||||||
break;
|
break;
|
||||||
|
@ -1696,20 +1696,34 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
|
|||||||
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
|
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
|
||||||
|
|
||||||
// Increment reg.
|
// Increment reg.
|
||||||
def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
|
// Do not make INC if it is slow
|
||||||
def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>;
|
def : Pat<(add GR8:$src, 1),
|
||||||
def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
|
(INC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
|
||||||
def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>;
|
def : Pat<(add GR16:$src, 1),
|
||||||
def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
|
(INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
|
||||||
def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
|
def : Pat<(add GR16:$src, 1),
|
||||||
|
(INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
|
||||||
|
def : Pat<(add GR32:$src, 1),
|
||||||
|
(INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
|
||||||
|
def : Pat<(add GR32:$src, 1),
|
||||||
|
(INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
|
||||||
|
def : Pat<(add GR64:$src, 1),
|
||||||
|
(INC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
|
||||||
|
|
||||||
// Decrement reg.
|
// Decrement reg.
|
||||||
def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
|
// Do not make DEC if it is slow
|
||||||
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>;
|
def : Pat<(add GR8:$src, -1),
|
||||||
def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
|
(DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
|
||||||
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>;
|
def : Pat<(add GR16:$src, -1),
|
||||||
def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
|
(DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
|
||||||
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
|
def : Pat<(add GR16:$src, -1),
|
||||||
|
(DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
|
||||||
|
def : Pat<(add GR32:$src, -1),
|
||||||
|
(DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
|
||||||
|
def : Pat<(add GR32:$src, -1),
|
||||||
|
(DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
|
||||||
|
def : Pat<(add GR64:$src, -1),
|
||||||
|
(DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
|
||||||
|
|
||||||
// or reg/reg.
|
// or reg/reg.
|
||||||
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
|
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
|
||||||
|
@ -795,6 +795,7 @@ def OptForSpeed : Predicate<"!OptForSize">;
|
|||||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||||
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
|
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
|
||||||
|
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 Instruction Format Definitions.
|
// X86 Instruction Format Definitions.
|
||||||
|
@ -291,6 +291,7 @@ void X86Subtarget::initializeEnvironment() {
|
|||||||
CallRegIndirect = false;
|
CallRegIndirect = false;
|
||||||
LEAUsesAG = false;
|
LEAUsesAG = false;
|
||||||
SlowLEA = false;
|
SlowLEA = false;
|
||||||
|
SlowIncDec = false;
|
||||||
stackAlignment = 4;
|
stackAlignment = 4;
|
||||||
// FIXME: this is a known good value for Yonah. How about others?
|
// FIXME: this is a known good value for Yonah. How about others?
|
||||||
MaxInlineSizeThreshold = 128;
|
MaxInlineSizeThreshold = 128;
|
||||||
|
@ -181,6 +181,9 @@ protected:
|
|||||||
/// SlowLEA - True if the LEA instruction with certain arguments is slow
|
/// SlowLEA - True if the LEA instruction with certain arguments is slow
|
||||||
bool SlowLEA;
|
bool SlowLEA;
|
||||||
|
|
||||||
|
/// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
|
||||||
|
bool SlowIncDec;
|
||||||
|
|
||||||
/// Processor has AVX-512 PreFetch Instructions
|
/// Processor has AVX-512 PreFetch Instructions
|
||||||
bool HasPFI;
|
bool HasPFI;
|
||||||
|
|
||||||
@ -319,6 +322,7 @@ public:
|
|||||||
bool callRegIndirect() const { return CallRegIndirect; }
|
bool callRegIndirect() const { return CallRegIndirect; }
|
||||||
bool LEAusesAG() const { return LEAUsesAG; }
|
bool LEAusesAG() const { return LEAUsesAG; }
|
||||||
bool slowLEA() const { return SlowLEA; }
|
bool slowLEA() const { return SlowLEA; }
|
||||||
|
bool slowIncDec() const { return SlowIncDec; }
|
||||||
bool hasCDI() const { return HasCDI; }
|
bool hasCDI() const { return HasCDI; }
|
||||||
bool hasPFI() const { return HasPFI; }
|
bool hasPFI() const { return HasPFI; }
|
||||||
bool hasERI() const { return HasERI; }
|
bool hasERI() const { return HasERI; }
|
||||||
|
Loading…
Reference in New Issue
Block a user