mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-16 08:08:01 +00:00
Don't use v16i32 for load pattern matching. All 512-bit loads are cated to v8i64.
llvm-svn: 188534
This commit is contained in:
parent
95665b8b8f
commit
79189e25c8
@ -555,7 +555,7 @@ let Constraints = "$src1 = $dst" in {
|
||||
(bitconvert (mem_frag addr:$src3)))))]>, EVEX_4V;
|
||||
}
|
||||
}
|
||||
defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem,
|
||||
defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv8i64, i512mem,
|
||||
v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem,
|
||||
v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
@ -1107,7 +1107,7 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
|
||||
}
|
||||
|
||||
multiclass avx512_mov_int<bits<8> opc, string asm, RegisterClass RC,
|
||||
RegisterClass KRC,
|
||||
RegisterClass KRC, PatFrag bc_frag,
|
||||
PatFrag ld_frag, X86MemOperand x86memop> {
|
||||
let neverHasSideEffects = 1 in
|
||||
def rr : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
@ -1116,7 +1116,7 @@ let neverHasSideEffects = 1 in
|
||||
let canFoldAsLoad = 1 in
|
||||
def rm : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (ld_frag addr:$src))]>,
|
||||
[(set RC:$dst, (bc_frag (ld_frag addr:$src)))]>,
|
||||
EVEX;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def rrk : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
@ -1132,10 +1132,10 @@ let Constraints = "$src1 = $dst" in {
|
||||
}
|
||||
}
|
||||
|
||||
defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, bc_v16i32,
|
||||
memopv8i64, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, bc_v8i64,
|
||||
memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
|
||||
|
@ -277,7 +277,6 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
|
||||
// 512-bit load pattern fragments
|
||||
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
|
||||
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
|
||||
def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
|
||||
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
|
||||
|
||||
// 128-/256-/512-bit extload pattern fragments
|
||||
@ -351,8 +350,6 @@ def alignedloadv16f32 : PatFrag<(ops node:$ptr),
|
||||
(v16f32 (alignedload512 node:$ptr))>;
|
||||
def alignedloadv8f64 : PatFrag<(ops node:$ptr),
|
||||
(v8f64 (alignedload512 node:$ptr))>;
|
||||
def alignedloadv16i32 : PatFrag<(ops node:$ptr),
|
||||
(v16i32 (alignedload512 node:$ptr))>;
|
||||
def alignedloadv8i64 : PatFrag<(ops node:$ptr),
|
||||
(v8i64 (alignedload512 node:$ptr))>;
|
||||
|
||||
@ -379,14 +376,12 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
|
||||
// 256-bit memop pattern fragments
|
||||
// NOTE: all 256-bit integer vector loads are promoted to v4i64
|
||||
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
|
||||
def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
|
||||
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
|
||||
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
|
||||
|
||||
// 512-bit memop pattern fragments
|
||||
def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>;
|
||||
def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>;
|
||||
def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>;
|
||||
def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>;
|
||||
|
||||
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
|
||||
@ -438,6 +433,11 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
|
||||
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
|
||||
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
|
||||
|
||||
// 512-bit bitconvert pattern fragments
|
||||
def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
|
||||
def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
|
||||
|
||||
|
||||
def vzmovl_v2i64 : PatFrag<(ops node:$src),
|
||||
(bitconvert (v2i64 (X86vzmovl
|
||||
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
|
||||
|
@ -64,3 +64,44 @@ define <8 x i64> @test6(<8 x i64> %a) nounwind {
|
||||
ret <8 x i64> %c
|
||||
}
|
||||
|
||||
; CHECK: test7:
|
||||
; CHECK: vpermi2q
|
||||
; CHECK: ret
|
||||
define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
|
||||
%c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
|
||||
ret <8 x i64> %c
|
||||
}
|
||||
|
||||
; CHECK: test8:
|
||||
; CHECK: vpermi2d
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
|
||||
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
||||
ret <16 x i32> %c
|
||||
}
|
||||
|
||||
; CHECK: test9:
|
||||
; CHECK: vpermi2ps
|
||||
; CHECK: ret
|
||||
define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
|
||||
%c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
||||
ret <16 x float> %c
|
||||
}
|
||||
|
||||
; CHECK: test10:
|
||||
; CHECK: vpermi2ps (
|
||||
; CHECK: ret
|
||||
define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
|
||||
%c = load <16 x float>* %b
|
||||
%d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
||||
ret <16 x float> %d
|
||||
}
|
||||
|
||||
; CHECK: test11:
|
||||
; CHECK: vpermi2d (
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
|
||||
%c = load <16 x i32>* %b
|
||||
%d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
|
||||
ret <16 x i32> %d
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user