mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-09 09:32:20 +00:00
[X86] Remove AVX1 vbroadcast intrinsics
The corresponding CFE patch replaces these intrinsics with vector initializers in avxintrin.h. This patch removes the LLVM intrinsics from the backend. We now stop lowering at X86ISD::VBROADCAST custom node rather than lowering that further to the intrinsics. The patch only changes VBROADCASTS* and leaves VBROADCAST[FI]128 to continue to use intrinsics. As explained in the CFE patch, the reason is that we currently don't generate as good code for them without the intrinsics. CodeGen/X86/avx-vbroadcast.ll already provides coverage for this change. It checks that for a series of insertelements we generate the appropriate vbroadcast instruction. Also verified that there was no assembly change in the test-suite before and after this patch. llvm-svn: 209864
This commit is contained in:
parent
39066800e9
commit
35b80eaef1
@ -1304,15 +1304,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
// Vector load with broadcast
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx_vbroadcast_ss :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx_vbroadcast_sd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx_vbroadcast_ss_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
|
||||
def int_x86_avx_vbroadcastf128_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
|
||||
|
@ -7969,6 +7969,16 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
|
||||
|
||||
class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, ValueType VT,
|
||||
PatFrag ld_frag, SchedWrite Sched> :
|
||||
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
|
||||
Sched<[Sched]>, VEX {
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
// AVX2 adds register forms
|
||||
class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
Intrinsic Int, SchedWrite Sched> :
|
||||
@ -7977,16 +7987,15 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
[(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
|
||||
int_x86_avx_vbroadcast_ss, WriteLoad>;
|
||||
def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
|
||||
int_x86_avx_vbroadcast_ss_256,
|
||||
WriteFShuffleLd>, VEX_L;
|
||||
def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
|
||||
f32mem, v4f32, loadf32, WriteLoad>;
|
||||
def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
|
||||
f32mem, v8f32, loadf32,
|
||||
WriteFShuffleLd>, VEX_L;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
|
||||
int_x86_avx_vbroadcast_sd_256,
|
||||
WriteFShuffleLd>, VEX_L;
|
||||
def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
|
||||
v4f64, loadf64, WriteFShuffleLd>, VEX_L;
|
||||
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
|
||||
int_x86_avx_vbroadcastf128_pd_256,
|
||||
WriteFShuffleLd>, VEX_L;
|
||||
@ -8543,13 +8552,6 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
|
||||
(VBROADCASTSSYrm addr:$src)>;
|
||||
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(VBROADCASTSDYrm addr:$src)>;
|
||||
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
|
||||
(VBROADCASTSSrm addr:$src)>;
|
||||
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
let AddedComplexity = 20 in {
|
||||
|
@ -2219,14 +2219,6 @@ define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
|
||||
declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
|
||||
|
||||
|
||||
define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
|
||||
; CHECK: vbroadcastsd
|
||||
%res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
|
||||
ret <4 x double> %res
|
||||
}
|
||||
declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
|
||||
|
||||
|
||||
define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
|
||||
; CHECK: vbroadcastf128
|
||||
%res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
|
||||
@ -2243,22 +2235,6 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
|
||||
declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
|
||||
|
||||
|
||||
define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
|
||||
; CHECK: vbroadcastss
|
||||
%res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
|
||||
|
||||
|
||||
define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
|
||||
; CHECK: vbroadcastss
|
||||
%res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
|
||||
|
||||
|
||||
define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
|
||||
; CHECK: vextractf128
|
||||
%res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
|
||||
|
Loading…
Reference in New Issue
Block a user