mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-03 17:32:59 +00:00
[x86-64] allow mfence even with -mno-sse (PR23203)
As shown in: https://llvm.org/bugs/show_bug.cgi?id=23203 ...we currently die because lowering believes that mfence is allowed without SSE2 on x86-64, but the instruction def doesn't know that. I don't know if allowing mfence without SSE is right, but if not, at least now it's consistently wrong. :) Differential Revision: http://reviews.llvm.org/D17219 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260828 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
026259738d
commit
5fb1d32848
@ -19717,13 +19717,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasMFENCE(const X86Subtarget &Subtarget) {
|
||||
// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
|
||||
// no-sse2). There isn't any reason to disable it if the target processor
|
||||
// supports it.
|
||||
return Subtarget.hasSSE2() || Subtarget.is64Bit();
|
||||
}
|
||||
|
||||
LoadInst *
|
||||
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
||||
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
|
||||
@ -19763,7 +19756,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
|
||||
// the IR level, so we must wrap it in an intrinsic.
|
||||
return nullptr;
|
||||
|
||||
if (!hasMFENCE(Subtarget))
|
||||
if (!Subtarget.hasMFence())
|
||||
// FIXME: it might make sense to use a locked operation here but on a
|
||||
// different cache-line to prevent cache-line bouncing. In practice it
|
||||
// is probably a small win, and x86 processors without mfence are rare
|
||||
@ -19794,7 +19787,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// The only fence that needs an instruction is a sequentially-consistent
|
||||
// cross-thread fence.
|
||||
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
|
||||
if (hasMFENCE(Subtarget))
|
||||
if (Subtarget.hasMFence())
|
||||
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
|
||||
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
|
@ -845,6 +845,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
|
||||
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
|
||||
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
|
||||
def HasMFence : Predicate<"Subtarget->hasMFence()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
@ -3762,6 +3762,8 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins),
|
||||
|
||||
let SchedRW = [WriteFence] in {
|
||||
// Load, store, and memory fence
|
||||
// TODO: As with mfence, we may want to ease the availablity of sfence/lfence
|
||||
// to include any 64-bit target.
|
||||
def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
|
||||
"sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
|
||||
PS, Requires<[HasSSE1]>;
|
||||
@ -3770,7 +3772,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
|
||||
TB, Requires<[HasSSE2]>;
|
||||
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
|
||||
"mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
|
||||
TB, Requires<[HasSSE2]>;
|
||||
TB, Requires<[HasMFence]>;
|
||||
} // SchedRW
|
||||
|
||||
def : Pat<(X86SFence), (SFENCE)>;
|
||||
|
@ -446,6 +446,11 @@ public:
|
||||
bool isSLM() const { return X86ProcFamily == IntelSLM; }
|
||||
bool useSoftFloat() const { return UseSoftFloat; }
|
||||
|
||||
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
|
||||
/// no-sse2). There isn't any reason to disable it if the target processor
|
||||
/// supports it.
|
||||
bool hasMFence() const { return hasSSE2() || is64Bit(); }
|
||||
|
||||
const Triple &getTargetTriple() const { return TargetTriple; }
|
||||
|
||||
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
|
||||
|
@ -1,11 +1,37 @@
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use mfence.
|
||||
|
||||
define void @test() {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: mfence
|
||||
; CHECK-NEXT: retl
|
||||
; X32-LABEL: test:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: mfence
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: mfence
|
||||
; X64-NEXT: retq
|
||||
fence seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @fence(i32* %ptr) {
|
||||
; X32-LABEL: fence:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: mfence
|
||||
; X32-NEXT: movl (%eax), %eax
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fence:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: mfence
|
||||
; X64-NEXT: movl (%rdi), %eax
|
||||
; X64-NEXT: retq
|
||||
%atomic = atomicrmw add i32* %ptr, i32 0 seq_cst
|
||||
ret i32 %atomic
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user