From 5fb1d3284841472e706ceda0103da24eae7b0542 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 13 Feb 2016 17:26:29 +0000 Subject: [PATCH] [x86-64] allow mfence even with -mno-sse (PR23203) As shown in: https://llvm.org/bugs/show_bug.cgi?id=23203 ...we currently die because lowering believes that mfence is allowed without SSE2 on x86-64, but the instruction def doesn't know that. I don't know if allowing mfence without SSE is right, but if not, at least now it's consistently wrong. :) Differential Revision: http://reviews.llvm.org/D17219 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260828 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++------- lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSSE.td | 4 +++- lib/Target/X86/X86Subtarget.h | 5 +++++ test/CodeGen/X86/mfence.ll | 36 +++++++++++++++++++++++++----- 5 files changed, 42 insertions(+), 15 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 277fb815382..a1050381cfd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19717,13 +19717,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } } -static bool hasMFENCE(const X86Subtarget &Subtarget) { - // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for - // no-sse2). There isn't any reason to disable it if the target processor - // supports it. - return Subtarget.hasSSE2() || Subtarget.is64Bit(); -} - LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -19763,7 +19756,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // the IR level, so we must wrap it in an intrinsic. return nullptr; - if (!hasMFENCE(Subtarget)) + if (!Subtarget.hasMFence()) // FIXME: it might make sense to use a locked operation here but on a // different cache-line to prevent cache-line bouncing. In practice it // is probably a small win, and x86 processors without mfence are rare @@ -19794,7 +19787,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) { - if (hasMFENCE(Subtarget)) + if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 7178f1f6014..712711bea3e 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -845,6 +845,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; +def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 499c4c11c59..9c127ff5650 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3762,6 +3762,8 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), let SchedRW = [WriteFence] in { // Load, store, and memory fence +// TODO: As with mfence, we may want to ease the availablity of sfence/lfence +// to include any 64-bit target. def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, PS, Requires<[HasSSE1]>; @@ -3770,7 +3772,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, - TB, Requires<[HasSSE2]>; + TB, Requires<[HasMFence]>; } // SchedRW def : Pat<(X86SFence), (SFENCE)>; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 501770ca0d0..86f25408650 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -446,6 +446,11 @@ public: bool isSLM() const { return X86ProcFamily == IntelSLM; } bool useSoftFloat() const { return UseSoftFloat; } + /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for + /// no-sse2). There isn't any reason to disable it if the target processor + /// supports it. + bool hasMFence() const { return hasSSE2() || is64Bit(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/test/CodeGen/X86/mfence.ll b/test/CodeGen/X86/mfence.ll index e1825f2beb1..b67a5c35504 100644 --- a/test/CodeGen/X86/mfence.ll +++ b/test/CodeGen/X86/mfence.ll @@ -1,11 +1,37 @@ -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64 + +; It doesn't matter if an x86-64 target has specified "no-sse2"; we still can use mfence. define void @test() { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: mfence -; CHECK-NEXT: retl +; X32-LABEL: test: +; X32: # BB#0: +; X32-NEXT: mfence +; X32-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: mfence +; X64-NEXT: retq fence seq_cst ret void } +define i32 @fence(i32* %ptr) { +; X32-LABEL: fence: +; X32: # BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: mfence +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: retl +; +; X64-LABEL: fence: +; X64: # BB#0: +; X64-NEXT: mfence +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: retq + %atomic = atomicrmw add i32* %ptr, i32 0 seq_cst + ret i32 %atomic +} +