diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 3f116f648ce..e473e75a583 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -536,6 +536,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_i32_ty], [IntrWriteMem]>; } +//===----------------------------------------------------------------------===// +// SSSE3 + +// FP arithmetic ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_ssse3_pmulhrsw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // MMX diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 3049a861e26..33de92a6bb7 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -30,6 +30,8 @@ def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions">; def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3", "Enable SSE3 instructions">; +def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3", + "Enable SSSE3 instructions">; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions">; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 899e30f6f49..a791e2a04ab 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -584,6 +584,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) { case X86II::TB: Need0FPrefix = true; // Two-byte opcode prefix break; + case X86II::T8: + MCE.emitByte(0x0F); + MCE.emitByte(0x38); + break; + case X86II::TA: + MCE.emitByte(0x0F); + MCE.emitByte(0x3A); + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F MCE.emitByte(0xF3); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index b753a88f1d0..aaf6f15e5ff 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -154,7 +154,10 @@ namespace X86II { // XS, XD - These prefix codes are for single and double precision scalar // floating point operations performed in the SSE registers. - XD = 11 << Op0Shift, XS = 12 << Op0Shift, + XD = 11 << Op0Shift, XS = 12 << Op0Shift, + + // T8, TA - Prefix after the 0x0F prefix. + T8 = 13 << Op0Shift, TA = 14 << Op0Shift, //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index fd08e477014..8e4e7d7ecb4 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -167,6 +167,7 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; def FPStack : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; def In64BitMode : Predicate<"Subtarget->is64Bit()">; @@ -248,6 +249,8 @@ class DE { bits<4> Prefix = 9; } class DF { bits<4> Prefix = 10; } class XD { bits<4> Prefix = 11; } class XS { bits<4> Prefix = 12; } +class T8 { bits<4> Prefix = 13; } +class TA { bits<4> Prefix = 14; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 82831f7ecf1..61ea9a85a76 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -183,15 +183,17 @@ def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{ //===----------------------------------------------------------------------===// // Instruction templates -// SSI - SSE1 instructions with XS prefix. -// SDI - SSE2 instructions with XD prefix. -// PSI - SSE1 instructions with TB prefix. -// PDI - SSE2 instructions with TB and OpSize prefixes. +// SSI - SSE1 instructions with XS prefix. +// SDI - SSE2 instructions with XD prefix. +// PSI - SSE1 instructions with TB prefix. +// PDI - SSE2 instructions with TB and OpSize prefixes. // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. -// S3I - SSE3 instructions with TB and OpSize prefixes. -// S3SI - SSE3 instructions with XS prefix. -// S3DI - SSE3 instructions with XD prefix. +// S3I - SSE3 instructions with TB and OpSize prefixes. +// S3SI - SSE3 instructions with XS prefix. +// S3DI - SSE3 instructions with XD prefix. +// SS38I - SSSE3 instructions with T8 and OpSize prefixes. +// SS3AI - SSSE3 instructions with TA and OpSize prefixes. class SSI o, Format F, dag ops, string asm, list pattern> : I, XS, Requires<[HasSSE1]>; class SDI o, Format F, dag ops, string asm, list pattern> @@ -212,6 +214,11 @@ class S3DI o, Format F, dag ops, string asm, list pattern> class S3I o, Format F, dag ops, string asm, list pattern> : I, TB, OpSize, Requires<[HasSSE3]>; +class SS38I o, Format F, dag ops, string asm, list pattern> + : I, T8, OpSize, Requires<[HasSSSE3]>; +class SS3AI o, Format F, dag ops, string asm, list pattern> + : I, TA, OpSize, Requires<[HasSSSE3]>; + //===----------------------------------------------------------------------===// // Helpers for defining instructions that directly correspond to intrinsics. @@ -1311,6 +1318,22 @@ multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, } } +/// SS3I_binop_rm_int - Simple SSSE3 binary operatr whose type is v2i64. +let isTwoAddress = 1 in { + multiclass SS3I_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, + bit Commutable = 0> { + def rr : SS38I { + let isCommutable = Commutable; + } + def rm : SS38I; + } +} // 128-bit Integer Arithmetic @@ -1340,6 +1363,9 @@ defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>; defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; +defm PMULHRSW128 : SS3I_binop_rm_int<0x0B, "pmulhrsw", + int_x86_ssse3_pmulhrsw_128, 1>; + defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index cd6fb034e09..00e34d5d097 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -113,6 +113,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((EDX >> 25) & 0x1) X86SSELevel = SSE1; if ((EDX >> 26) & 0x1) X86SSELevel = SSE2; if (ECX & 0x1) X86SSELevel = SSE3; + if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3; if (memcmp(text.c, "GenuineIntel", 12) == 0) { X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index f3a25433450..2cda9705e31 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -38,7 +38,7 @@ public: }; protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3 + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3 }; enum X863DNowEnum { @@ -51,7 +51,7 @@ protected: /// PICStyle - Which PIC style to use PICStyle::Style PICStyle; - /// X86SSELevel - MMX, SSE1, SSE2, SSE3, or none supported. + /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, or none supported. X86SSEEnum X86SSELevel; /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported. @@ -110,6 +110,7 @@ public: bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } bool hasSSE3() const { return X86SSELevel >= SSE3; } + bool hasSSSE3() const { return X86SSELevel >= SSSE3; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }