Move the MMX subtarget feature out of the SSE set of features and into

its own variable.

This is needed so that we can explicitly turn off MMX without turning
off SSE and also so that we can diagnose feature set incompatibilities
that involve MMX without SSE.

Rationale:

// sse3
__m128d test_mm_addsub_pd(__m128d A, __m128d B) {
  return _mm_addsub_pd(A, B);
}

// mmx
void shift(__m64 a, __m64 b, int c) {
  _mm_slli_pi16(a, c);
  _mm_slli_pi32(a, c);
  _mm_slli_si64(a, c);
  _mm_srli_pi16(a, c);
  _mm_srli_pi32(a, c);
  _mm_srli_si64(a, c);
  _mm_srai_pi16(a, c);
  _mm_srai_pi32(a, c);
}

clang -msse3 -mno-mmx file.c -c

For this code we should be able to explicitly turn off MMX
without affecting the compilation of the SSE3 function and then
diagnose and error on compiling the MMX function.

This matches the existing gcc behavior and follows the spirit of
the SSE/MMX separation in llvm where we can (and do) turn off
MMX code generation except in the presence of intrinsics.

Updated a couple of tests, but primarily tested with a couple of tests
for turning on only mmx and only sse.

This is paired with a patch to clang to take advantage of this behavior.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249731 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Christopher 2015-10-08 20:10:06 +00:00
parent 3bc8dc3685
commit 47f0e3f434
7 changed files with 341 additions and 171 deletions

View File

@ -37,14 +37,17 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
// The MMX subtarget feature is separate from the rest of the SSE features
// because it's important (for odd compatibility reasons) to be able to
// turn it off explicitly while allowing SSE+ to be on.
def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true",
"Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
// SSE1+ processors support them.
[FeatureMMX, FeatureCMOV]>;
[FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@ -219,36 +222,52 @@ def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"i686", [FeatureSlowUAMem16]>;
def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureSSE1]>;
def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureSSE1,
def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1]>;
def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureSSE2,
def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureSSE2,
def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
FeatureSlowBTMem]>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
def : ProcessorModel<
"yonah", SandyBridgeModel,
[ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;
// NetBurst.
def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"prescott",
[ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;
def : Proc<"nocona", [
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE3,
FeatureCMPXCHG16B,
FeatureSlowBTMem
]>;
// Intel Core 2 Solo/Duo.
def : ProcessorModel<"core2", SandyBridgeModel,
[FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : ProcessorModel<"penryn", SandyBridgeModel,
[FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSSE3,
FeatureCMPXCHG16B,
FeatureSlowBTMem
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE41,
FeatureCMPXCHG16B,
FeatureSlowBTMem
]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSSE3,
FeatureCMPXCHG16B,
FeatureMOVBE,
@ -265,6 +284,7 @@ def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
ProcIntelSLM,
FeatureMMX,
FeatureSSE42,
FeatureCMPXCHG16B,
FeatureMOVBE,
@ -283,6 +303,7 @@ def : SilvermontProc<"slm">; // Legacy alias.
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureMMX,
FeatureSSE42,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
@ -294,6 +315,7 @@ def : NehalemProc<"corei7">;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureMMX,
FeatureSSE42,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
@ -306,6 +328,7 @@ def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureMMX,
FeatureAVX,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
@ -318,6 +341,7 @@ def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureMMX,
FeatureAVX,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
@ -333,6 +357,7 @@ def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
@ -355,6 +380,7 @@ def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
@ -378,25 +404,59 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
def : BroadwellProc<"broadwell">;
// FIXME: define KNL model
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
[FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
FeatureCMPXCHG16B, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
FeatureSlowIncDec, FeatureMPX]>;
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
FeatureAVX512,
FeatureERI,
FeatureCDI,
FeaturePFI,
FeatureCMPXCHG16B,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec,
FeatureMPX
]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
[FeatureAVX512, FeatureCDI,
FeatureDQI, FeatureBWI, FeatureVLX,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSlowIncDec,
FeatureMPX]>;
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
FeatureAVX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
FeatureVLX,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec,
FeatureMPX
]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
@ -447,52 +507,117 @@ def : Proc<"barcelona", [FeatureSSE4A,
FeatureSlowSHLD]>;
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD]>;
def : Proc<"btver1", [
FeatureMMX,
FeatureSSSE3,
FeatureSSE4A,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureLZCNT,
FeaturePOPCNT,
FeatureSlowSHLD
]>;
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model,
[FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
FeatureBMI, FeatureF16C, FeatureMOVBE,
FeatureLZCNT, FeaturePOPCNT,
FeatureSlowSHLD]>;
def : ProcessorModel<"btver2", BtVer2Model, [
FeatureMMX,
FeatureAVX,
FeatureSSE4A,
FeatureCMPXCHG16B,
FeaturePRFCHW,
FeatureAES,
FeaturePCLMUL,
FeatureBMI,
FeatureF16C,
FeatureMOVBE,
FeatureLZCNT,
FeaturePOPCNT,
FeatureSlowSHLD
]>;
// Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowSHLD]>;
def : Proc<"bdver1", [
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureMMX,
FeatureAVX,
FeatureSSE4A,
FeatureLZCNT,
FeaturePOPCNT,
FeatureSlowSHLD
]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
def : Proc<"bdver2", [
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureMMX,
FeatureAVX,
FeatureSSE4A,
FeatureF16C,
FeatureLZCNT,
FeaturePOPCNT,
FeatureBMI,
FeatureTBM,
FeatureFMA,
FeatureSlowSHLD
]>;
// Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD,
FeatureFSGSBase]>;
def : Proc<"bdver3", [
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureMMX,
FeatureAVX,
FeatureSSE4A,
FeatureF16C,
FeatureLZCNT,
FeaturePOPCNT,
FeatureBMI,
FeatureTBM,
FeatureFMA,
FeatureSlowSHLD,
FeatureFSGSBase
]>;
// Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2,
FeatureTBM, FeatureFMA, FeatureSSE4A,
FeatureFSGSBase]>;
def : Proc<"bdver4", [
FeatureMMX,
FeatureAVX2,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
FeatureAES,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureF16C,
FeatureLZCNT,
FeaturePOPCNT,
FeatureBMI,
FeatureBMI2,
FeatureTBM,
FeatureFMA,
FeatureSSE4A,
FeatureFSGSBase
]>;
def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@ -504,8 +629,9 @@ def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel,
[FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
def : ProcessorModel<
"x86-64", SandyBridgeModel,
[ FeatureMMX, FeatureSSE2, Feature64Bit, FeatureSlowBTMem ]>;
//===----------------------------------------------------------------------===//
// Register File Description

View File

@ -228,9 +228,10 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
}
void X86Subtarget::initializeEnvironment() {
X86SSELevel = NoMMXSSE;
X86SSELevel = NoSSE;
X863DNowLevel = NoThreeDNow;
HasCMov = false;
HasMMX = false;
HasX86_64 = false;
HasPOPCNT = false;
HasSSE4A = false;

View File

@ -47,7 +47,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
@ -64,7 +64,7 @@ protected:
/// Which PIC style to use
PICStyles::Style PICStyle;
/// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
/// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel;
/// 3DNow, 3DNow Athlon, or none supported.
@ -74,6 +74,9 @@ protected:
/// (generally pentium pro+).
bool HasCMov;
/// True if this processor supports MMX instructions.
bool HasMMX;
/// True if the processor supports X86-64 instructions.
bool HasX86_64;
@ -319,7 +322,7 @@ public:
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
bool hasCMov() const { return HasCMov; }
bool hasMMX() const { return X86SSELevel >= MMX; }
bool hasMMX() const { return HasMMX; }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone

View File

@ -0,0 +1,21 @@
; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s
; Test that turning off sse doesn't turn off mmx.
declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
; CHECK-LABEL: @test88
; CHECK: pcmpgtd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
%mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
%2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
%3 = bitcast x86_mmx %2 to <2 x i32>
%4 = bitcast <2 x i32> %3 to <1 x i64>
%5 = extractelement <1 x i64> %4, i32 0
ret i64 %5
}

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as
; ModuleID = 'mult-alt-x86.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i686-pc-win32"

View File

@ -0,0 +1,19 @@
; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s
; Test that turning off mmx doesn't turn off sse
define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movapd (%ecx), %xmm0
; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp3 = load <2 x double>, <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
}