Add AVX SSE4.1 binop (some forms of packed max,min,mul,pack,cmp) instructions

llvm-svn: 107558
This commit is contained in:
Bruno Cardoso Lopes 2010-07-03 01:15:47 +00:00
parent 128a0197bb
commit bc75502f09
3 changed files with 202 additions and 0 deletions

View File

@ -4123,6 +4123,32 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
0>, VEX_4V;
defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
0>, VEX_4V;
defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
0>, VEX_4V;
defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
0>, VEX_4V;
defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
0>, VEX_4V;
defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
0>, VEX_4V;
defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
0>, VEX_4V;
defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
0>, VEX_4V;
defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;

View File

@ -11782,3 +11782,91 @@
// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10]
vphminposuw (%eax), %xmm2
// CHECK: vpackusdw %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca]
vpackusdw %xmm2, %xmm3, %xmm1
// CHECK: vpackusdw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18]
vpackusdw (%eax), %xmm2, %xmm3
// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca]
vpcmpeqq %xmm2, %xmm3, %xmm1
// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18]
vpcmpeqq (%eax), %xmm2, %xmm3
// CHECK: vpminsb %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca]
vpminsb %xmm2, %xmm3, %xmm1
// CHECK: vpminsb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18]
vpminsb (%eax), %xmm2, %xmm3
// CHECK: vpminsd %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca]
vpminsd %xmm2, %xmm3, %xmm1
// CHECK: vpminsd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18]
vpminsd (%eax), %xmm2, %xmm3
// CHECK: vpminud %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca]
vpminud %xmm2, %xmm3, %xmm1
// CHECK: vpminud (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18]
vpminud (%eax), %xmm2, %xmm3
// CHECK: vpminuw %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca]
vpminuw %xmm2, %xmm3, %xmm1
// CHECK: vpminuw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18]
vpminuw (%eax), %xmm2, %xmm3
// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca]
vpmaxsb %xmm2, %xmm3, %xmm1
// CHECK: vpmaxsb (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18]
vpmaxsb (%eax), %xmm2, %xmm3
// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca]
vpmaxsd %xmm2, %xmm3, %xmm1
// CHECK: vpmaxsd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18]
vpmaxsd (%eax), %xmm2, %xmm3
// CHECK: vpmaxud %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca]
vpmaxud %xmm2, %xmm3, %xmm1
// CHECK: vpmaxud (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18]
vpmaxud (%eax), %xmm2, %xmm3
// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca]
vpmaxuw %xmm2, %xmm3, %xmm1
// CHECK: vpmaxuw (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18]
vpmaxuw (%eax), %xmm2, %xmm3
// CHECK: vpmuldq %xmm2, %xmm3, %xmm1
// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca]
vpmuldq %xmm2, %xmm3, %xmm1
// CHECK: vpmuldq (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18]
vpmuldq (%eax), %xmm2, %xmm3

View File

@ -1830,3 +1830,91 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20]
vphminposuw (%rax), %xmm12
// CHECK: vpackusdw %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc]
vpackusdw %xmm12, %xmm13, %xmm11
// CHECK: vpackusdw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28]
vpackusdw (%rax), %xmm12, %xmm13
// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc]
vpcmpeqq %xmm12, %xmm13, %xmm11
// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28]
vpcmpeqq (%rax), %xmm12, %xmm13
// CHECK: vpminsb %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc]
vpminsb %xmm12, %xmm13, %xmm11
// CHECK: vpminsb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28]
vpminsb (%rax), %xmm12, %xmm13
// CHECK: vpminsd %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc]
vpminsd %xmm12, %xmm13, %xmm11
// CHECK: vpminsd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28]
vpminsd (%rax), %xmm12, %xmm13
// CHECK: vpminud %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc]
vpminud %xmm12, %xmm13, %xmm11
// CHECK: vpminud (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28]
vpminud (%rax), %xmm12, %xmm13
// CHECK: vpminuw %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc]
vpminuw %xmm12, %xmm13, %xmm11
// CHECK: vpminuw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28]
vpminuw (%rax), %xmm12, %xmm13
// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc]
vpmaxsb %xmm12, %xmm13, %xmm11
// CHECK: vpmaxsb (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28]
vpmaxsb (%rax), %xmm12, %xmm13
// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc]
vpmaxsd %xmm12, %xmm13, %xmm11
// CHECK: vpmaxsd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28]
vpmaxsd (%rax), %xmm12, %xmm13
// CHECK: vpmaxud %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc]
vpmaxud %xmm12, %xmm13, %xmm11
// CHECK: vpmaxud (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28]
vpmaxud (%rax), %xmm12, %xmm13
// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc]
vpmaxuw %xmm12, %xmm13, %xmm11
// CHECK: vpmaxuw (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28]
vpmaxuw (%rax), %xmm12, %xmm13
// CHECK: vpmuldq %xmm12, %xmm13, %xmm11
// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc]
vpmuldq %xmm12, %xmm13, %xmm11
// CHECK: vpmuldq (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28]
vpmuldq (%rax), %xmm12, %xmm13