mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-20 02:58:10 +00:00
Add support for our first SSSE3 instruction "pmulhrsw".
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35869 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
773fd38553
commit
bb1ee05253
@ -536,6 +536,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_i32_ty], [IntrWriteMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSSE3
|
||||
|
||||
// FP arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_ssse3_pmulhrsw_128 : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
|
||||
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v8i16_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX
|
||||
|
||||
|
@ -30,6 +30,8 @@ def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
|
||||
"Enable SSE2 instructions">;
|
||||
def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
|
||||
"Enable SSE3 instructions">;
|
||||
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
|
||||
"Enable SSSE3 instructions">;
|
||||
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
|
||||
"Enable 3DNow! instructions">;
|
||||
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
||||
|
@ -584,6 +584,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
||||
case X86II::TB:
|
||||
Need0FPrefix = true; // Two-byte opcode prefix
|
||||
break;
|
||||
case X86II::T8:
|
||||
MCE.emitByte(0x0F);
|
||||
MCE.emitByte(0x38);
|
||||
break;
|
||||
case X86II::TA:
|
||||
MCE.emitByte(0x0F);
|
||||
MCE.emitByte(0x3A);
|
||||
break;
|
||||
case X86II::REP: break; // already handled.
|
||||
case X86II::XS: // F3 0F
|
||||
MCE.emitByte(0xF3);
|
||||
|
@ -154,7 +154,10 @@ namespace X86II {
|
||||
|
||||
// XS, XD - These prefix codes are for single and double precision scalar
|
||||
// floating point operations performed in the SSE registers.
|
||||
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
|
||||
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
|
||||
|
||||
// T8, TA - Prefix after the 0x0F prefix.
|
||||
T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
|
||||
|
@ -167,6 +167,7 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">;
|
||||
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
|
||||
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
|
||||
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
|
||||
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
|
||||
def FPStack : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
|
||||
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
|
||||
@ -248,6 +249,8 @@ class DE { bits<4> Prefix = 9; }
|
||||
class DF { bits<4> Prefix = 10; }
|
||||
class XD { bits<4> Prefix = 11; }
|
||||
class XS { bits<4> Prefix = 12; }
|
||||
class T8 { bits<4> Prefix = 13; }
|
||||
class TA { bits<4> Prefix = 14; }
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -183,15 +183,17 @@ def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Instruction templates
|
||||
// SSI - SSE1 instructions with XS prefix.
|
||||
// SDI - SSE2 instructions with XD prefix.
|
||||
// PSI - SSE1 instructions with TB prefix.
|
||||
// PDI - SSE2 instructions with TB and OpSize prefixes.
|
||||
// SSI - SSE1 instructions with XS prefix.
|
||||
// SDI - SSE2 instructions with XD prefix.
|
||||
// PSI - SSE1 instructions with TB prefix.
|
||||
// PDI - SSE2 instructions with TB and OpSize prefixes.
|
||||
// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
|
||||
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
|
||||
// S3I - SSE3 instructions with TB and OpSize prefixes.
|
||||
// S3SI - SSE3 instructions with XS prefix.
|
||||
// S3DI - SSE3 instructions with XD prefix.
|
||||
// S3I - SSE3 instructions with TB and OpSize prefixes.
|
||||
// S3SI - SSE3 instructions with XS prefix.
|
||||
// S3DI - SSE3 instructions with XD prefix.
|
||||
// SS38I - SSSE3 instructions with T8 and OpSize prefixes.
|
||||
// SS3AI - SSSE3 instructions with TA and OpSize prefixes.
|
||||
class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
: I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
|
||||
class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
@ -212,6 +214,11 @@ class S3DI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
class S3I<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
: I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
|
||||
|
||||
class SS38I<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
: I<o, F, ops, asm, pattern>, T8, OpSize, Requires<[HasSSSE3]>;
|
||||
class SS3AI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
|
||||
: I<o, F, ops, asm, pattern>, TA, OpSize, Requires<[HasSSSE3]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helpers for defining instructions that directly correspond to intrinsics.
|
||||
|
||||
@ -1311,6 +1318,22 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
}
|
||||
|
||||
/// SS3I_binop_rm_int - Simple SSSE3 binary operatr whose type is v2i64.
|
||||
let isTwoAddress = 1 in {
|
||||
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
||||
bit Commutable = 0> {
|
||||
def rr : SS38I<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rm : SS38I<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId VR128:$src1,
|
||||
(bitconvert (loadv2i64 addr:$src2))))]>;
|
||||
}
|
||||
}
|
||||
|
||||
// 128-bit Integer Arithmetic
|
||||
|
||||
@ -1340,6 +1363,9 @@ defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
|
||||
defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
|
||||
defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
|
||||
|
||||
defm PMULHRSW128 : SS3I_binop_rm_int<0x0B, "pmulhrsw",
|
||||
int_x86_ssse3_pmulhrsw_128, 1>;
|
||||
|
||||
defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
|
||||
|
||||
defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
|
||||
|
@ -113,6 +113,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
||||
if ((EDX >> 25) & 0x1) X86SSELevel = SSE1;
|
||||
if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
|
||||
if (ECX & 0x1) X86SSELevel = SSE3;
|
||||
if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3;
|
||||
|
||||
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
|
||||
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
};
|
||||
protected:
|
||||
enum X86SSEEnum {
|
||||
NoMMXSSE, MMX, SSE1, SSE2, SSE3
|
||||
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3
|
||||
};
|
||||
|
||||
enum X863DNowEnum {
|
||||
@ -51,7 +51,7 @@ protected:
|
||||
/// PICStyle - Which PIC style to use
|
||||
PICStyle::Style PICStyle;
|
||||
|
||||
/// X86SSELevel - MMX, SSE1, SSE2, SSE3, or none supported.
|
||||
/// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, or none supported.
|
||||
X86SSEEnum X86SSELevel;
|
||||
|
||||
/// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
|
||||
@ -110,6 +110,7 @@ public:
|
||||
bool hasSSE1() const { return X86SSELevel >= SSE1; }
|
||||
bool hasSSE2() const { return X86SSELevel >= SSE2; }
|
||||
bool hasSSE3() const { return X86SSELevel >= SSE3; }
|
||||
bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
|
||||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user