From 3c1482231290e4c180556e090475bd09e7a26480 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 7 Jul 2010 01:01:13 +0000 Subject: [PATCH] Add AVX SSE4.1 extractps and pinsr instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107746 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 102 ++++++++++++++++-------- test/MC/AsmParser/X86/x86_32-encoding.s | 32 ++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 40 ++++++++++ 3 files changed, 139 insertions(+), 35 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 453b1619c3e..a1c25acb9cc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4516,6 +4516,8 @@ multiclass SS41I_extractf32 opc, string OpcodeStr> { addr:$dst)]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -4525,46 +4527,76 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, Requires<[HasSSE41]>; -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert8 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize; - def rm : SS4AIi8, OpSize; - } +multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, OpSize; + def rm : SS4AIi8, OpSize; } -defm PINSRB : SS41I_insert8<0x20, "pinsrb">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRB : SS41I_insert8<0x20, "pinsrb">; -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert32 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize; - def rm : SS4AIi8, OpSize; - } +multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; } -defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRD : SS41I_insert32<0x22, "pinsrd">; + +multiclass SS41I_insert64_avx opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize, REX_W; + def rm : SS4AIi8, OpSize, REX_W; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in + defm VPINSRQ : SS41I_insert64_avx<0x22, "vpinsrq">, VEX_4V, VEX_W; // insertps has a few different modes, there's the first two here below which // are optimized inserts that won't zero arbitrary elements in the destination diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 7bfcd45630d..f0a879bb902 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12070,3 +12070,35 @@ // CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07] vpextrb $7, %xmm2, (%eax) +// CHECK: vextractps $7, %xmm2, (%eax) +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07] + vextractps $7, %xmm2, (%eax) + +// CHECK: vextractps $7, %xmm2, %eax +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07] + vextractps $7, %xmm2, %eax + +// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07] + vpinsrw $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07] + vpinsrw $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07] + vpinsrb $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07] + vpinsrb $7, (%eax), %xmm2, %xmm5 + +// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07] + vpinsrd $7, %eax, %xmm2, %xmm5 + +// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5 +// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07] + vpinsrd $7, (%eax), %xmm2, %xmm5 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index ac364bc5dbe..013e2a52716 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2126,3 +2126,43 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07] vpextrq $7, %xmm12, (%rcx) +// CHECK: vextractps $7, %xmm12, (%rax) +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07] + vextractps $7, %xmm12, (%rax) + +// CHECK: vextractps $7, %xmm12, %eax +// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07] + vextractps $7, %xmm12, %eax + +// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07] + vpinsrw $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07] + vpinsrw $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07] + vpinsrb $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07] + vpinsrb $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07] + vpinsrd $7, %eax, %xmm12, %xmm10 + +// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07] + vpinsrd $7, (%rax), %xmm12, %xmm10 + +// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07] + vpinsrq $7, %rax, %xmm12, %xmm10 + +// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10 +// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07] + vpinsrq $7, (%rax), %xmm12, %xmm10 +