Add 3DNow! intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129551 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael J. Spencer 2011-04-15 00:32:41 +00:00
parent de29a52940
commit 4babeeeeed
4 changed files with 451 additions and 51 deletions

View File

@ -17,6 +17,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
}
//===----------------------------------------------------------------------===//
// 3DNow!
let TargetPrefix = "x86" in {
def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// 3DNow! extensions
let TargetPrefix = "x86" in {
def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
[IntrNoMem]>;
def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_3dnowa_pswapd :
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// SSE1

View File

@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions">;
"Enable 3DNow! instructions",
[FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"k6-2", [Feature3DNow]>;
def : Proc<"k6-3", [Feature3DNow]>;
def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
def : Proc<"winchip2", [Feature3DNow]>;
def : Proc<"c3", [Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>;
//===----------------------------------------------------------------------===//

View File

@ -12,66 +12,91 @@
//
//===----------------------------------------------------------------------===//
// FIXME: We don't support any intrinsics for these instructions yet.
class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
: I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
}
class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
: I<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
: I3DNow<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
Has3DNow0F0FOpcode {
// FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
let isAsmParserOnly = 1;
let Constraints = "$src1 = $dst";
}
class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
: I3DNow<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
Has3DNow0F0FOpcode {
// FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
let isAsmParserOnly = 1;
}
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
}
multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
}
defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>;
}
multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
}
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn))
(bitconvert (load_mmx addr:$src))))]>;
}
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
"prefetch $addr", []>;
// FIXME: Diassembler gets a bogus decode conflict.
let isAsmParserOnly = 1 in {
let isAsmParserOnly = 1 in
def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
"prefetchw $addr", []>;
}
// "3DNowA" instructions
defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;

View File

@ -0,0 +1,297 @@
; RUN: llc < %s -march=x86 -mattr=+3dnow | FileCheck %s
define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
; CHECK: pavgusb
entry:
%0 = bitcast x86_mmx %a.coerce to <8 x i8>
%1 = bitcast x86_mmx %b.coerce to <8 x i8>
%2 = bitcast <8 x i8> %0 to x86_mmx
%3 = bitcast <8 x i8> %1 to x86_mmx
%4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
%5 = bitcast x86_mmx %4 to <8 x i8>
ret <8 x i8> %5
}
declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
; CHECK: pf2id
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
%2 = bitcast x86_mmx %1 to <2 x i32>
ret <2 x i32> %2
}
declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfacc
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfadd
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfcmpeq
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x i32>
ret <2 x i32> %3
}
declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfcmpge
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x i32>
ret <2 x i32> %3
}
declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfcmpgt
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x i32>
ret <2 x i32> %3
}
declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfmax
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfmin
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfmul
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
; CHECK: pfrcp
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
%2 = bitcast x86_mmx %1 to <2 x float>
ret <2 x float> %2
}
declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfrcpit1
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfrcpit2
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
; CHECK: pfrsqrt
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
%2 = bitcast x86_mmx %1 to <2 x float>
ret <2 x float> %2
}
declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfrsqit1
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfsub
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfsubr
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
; CHECK: pi2fd
entry:
%0 = bitcast x86_mmx %a.coerce to <2 x i32>
%1 = bitcast <2 x i32> %0 to x86_mmx
%2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
; CHECK: pmulhrw
entry:
%0 = bitcast x86_mmx %a.coerce to <4 x i16>
%1 = bitcast x86_mmx %b.coerce to <4 x i16>
%2 = bitcast <4 x i16> %0 to x86_mmx
%3 = bitcast <4 x i16> %1 to x86_mmx
%4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
%5 = bitcast x86_mmx %4 to <4 x i16>
ret <4 x i16> %5
}
declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
; CHECK: pf2iw
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
%2 = bitcast x86_mmx %1 to <2 x i32>
ret <2 x i32> %2
}
declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfnacc
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
; CHECK: pfpnacc
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = bitcast <2 x float> %b to x86_mmx
%2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
; CHECK: pi2fw
entry:
%0 = bitcast x86_mmx %a.coerce to <2 x i32>
%1 = bitcast <2 x i32> %0 to x86_mmx
%2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
%3 = bitcast x86_mmx %2 to <2 x float>
ret <2 x float> %3
}
declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
; CHECK: pswapd
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
%2 = bitcast x86_mmx %1 to <2 x float>
ret <2 x float> %2
}
define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
; CHECK: pswapd
entry:
%0 = bitcast <2 x i32> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
%2 = bitcast x86_mmx %1 to <2 x i32>
ret <2 x i32> %2
}
declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone