mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-24 21:00:36 +00:00
[X86] Don't fold into memory operands into insertps in the generated folding tables.
insertps behaves differently, the register form selects from an input register based on the immediate operand while the memory form just loads the given address. We have custom code to change the immediate in cases where that's legal, so completely remove insertps from the generated tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304540 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0f1cadd920
commit
f4cc8004bc
@ -1926,5 +1926,19 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
|
||||
ret <8 x float> %6
|
||||
}
|
||||
|
||||
define <4 x float> @stack_nofold_insertps(<8 x float> %a0, <8 x float> %a1) {
|
||||
; Cannot fold this without changing the immediate.
|
||||
; CHECK-LABEL: stack_nofold_insertps
|
||||
; CHECK: 32-byte Spill
|
||||
; CHECK: nop
|
||||
; CHECK: 32-byte Reload
|
||||
; CHECK: vinsertps $179, {{%xmm., %xmm., %xmm.}}
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%v0 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v1 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v0, <4 x float> %v1, i8 179)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
attributes #0 = { "unsafe-fp-math"="false" }
|
||||
attributes #1 = { "unsafe-fp-math"="true" }
|
||||
|
@ -101,6 +101,11 @@ const char *const NoFoldSet[] = {
|
||||
"BTS16rr", "BTS32rr", "BTS64rr",
|
||||
"BTS16mr", "BTS32mr", "BTS64mr",
|
||||
|
||||
// insertps cannot be folded without adjusting the immediate. There's custom
|
||||
// code to handle it in X86InstrInfo.cpp, ignore it here.
|
||||
"INSERTPSrr", "INSERTPSrm",
|
||||
"VINSERTPSrr", "VINSERTPSrm", "VINSERTPSZrr", "VINSERTPSZrm",
|
||||
|
||||
// Memory folding is enabled only when optimizing for size by DAG
|
||||
// patterns only. (issue detailed in D28744 review)
|
||||
"VCVTSS2SDrm", "VCVTSS2SDrr",
|
||||
|
Loading…
x
Reference in New Issue
Block a user