mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-12 07:40:58 +00:00
Add AVX SSE4.1 extractps and pinsr instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107746 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4b76ffc1ff
commit
3c14822312
@ -4516,6 +4516,8 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
||||
addr:$dst)]>, OpSize;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
|
||||
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
|
||||
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
|
||||
|
||||
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
|
||||
@ -4525,46 +4527,76 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
|
||||
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
|
||||
Requires<[HasSSE41]>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
|
||||
imm:$src3))]>, OpSize;
|
||||
}
|
||||
multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
|
||||
imm:$src3))]>, OpSize;
|
||||
}
|
||||
|
||||
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
|
||||
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
|
||||
OpSize;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
|
||||
imm:$src3)))]>, OpSize;
|
||||
}
|
||||
multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
|
||||
OpSize;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
|
||||
imm:$src3)))]>, OpSize;
|
||||
}
|
||||
|
||||
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
|
||||
defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
|
||||
|
||||
multiclass SS41I_insert64_avx<bits<8> opc, string OpcodeStr> {
|
||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
|
||||
OpSize, REX_W;
|
||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
|
||||
imm:$src3)))]>, OpSize, REX_W;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
|
||||
defm VPINSRQ : SS41I_insert64_avx<0x22, "vpinsrq">, VEX_4V, VEX_W;
|
||||
|
||||
// insertps has a few different modes, there's the first two here below which
|
||||
// are optimized inserts that won't zero arbitrary elements in the destination
|
||||
|
@ -12070,3 +12070,35 @@
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07]
|
||||
vpextrb $7, %xmm2, (%eax)
|
||||
|
||||
// CHECK: vextractps $7, %xmm2, (%eax)
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07]
|
||||
vextractps $7, %xmm2, (%eax)
|
||||
|
||||
// CHECK: vextractps $7, %xmm2, %eax
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07]
|
||||
vextractps $7, %xmm2, %eax
|
||||
|
||||
// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07]
|
||||
vpinsrw $7, %eax, %xmm2, %xmm5
|
||||
|
||||
// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07]
|
||||
vpinsrw $7, (%eax), %xmm2, %xmm5
|
||||
|
||||
// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07]
|
||||
vpinsrb $7, %eax, %xmm2, %xmm5
|
||||
|
||||
// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07]
|
||||
vpinsrb $7, (%eax), %xmm2, %xmm5
|
||||
|
||||
// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07]
|
||||
vpinsrd $7, %eax, %xmm2, %xmm5
|
||||
|
||||
// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
|
||||
vpinsrd $7, (%eax), %xmm2, %xmm5
|
||||
|
||||
|
@ -2126,3 +2126,43 @@ pshufb CPI1_0(%rip), %xmm1
|
||||
// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07]
|
||||
vpextrq $7, %xmm12, (%rcx)
|
||||
|
||||
// CHECK: vextractps $7, %xmm12, (%rax)
|
||||
// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07]
|
||||
vextractps $7, %xmm12, (%rax)
|
||||
|
||||
// CHECK: vextractps $7, %xmm12, %eax
|
||||
// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07]
|
||||
vextractps $7, %xmm12, %eax
|
||||
|
||||
// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07]
|
||||
vpinsrw $7, %eax, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07]
|
||||
vpinsrw $7, (%rax), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07]
|
||||
vpinsrb $7, %eax, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07]
|
||||
vpinsrb $7, (%rax), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07]
|
||||
vpinsrd $7, %eax, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07]
|
||||
vpinsrd $7, (%rax), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07]
|
||||
vpinsrq $7, %rax, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
|
||||
vpinsrq $7, (%rax), %xmm12, %xmm10
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user