Refactor aliased packed logical instructions, also add

AVX AND,OR,XOR,NAND{P}{S,D}{rr,rm} instructions.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106374 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-06-19 02:44:01 +00:00
parent 02ba9e19c7
commit f4f4bad696
3 changed files with 155 additions and 40 deletions

View File

@ -404,13 +404,14 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, ValueType vt,
X86MemOperand x86memop, PatFrag mem_frag,
Domain d> {
Domain d, bit MayLoad = 0> {
let isCommutable = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
OpcodeStr, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))],d>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
OpcodeStr, [(set RC:$dst, (OpNode RC:$src1,
(mem_frag addr:$src2)))],d>;
let mayLoad = MayLoad in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
OpcodeStr, [(set RC:$dst, (OpNode RC:$src1,
(mem_frag addr:$src2)))],d>;
}
/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
@ -666,50 +667,36 @@ def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
///
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode, int NoPat = 0,
bit MayLoad = 0, bit Commutable = 1> {
def PSrr : PSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
!if(NoPat, []<dag>,
[(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))])> {
let isCommutable = Commutable;
SDNode OpNode, bit MayLoad = 0> {
let isAsmParserOnly = 1 in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode, FR32,
f32, f128mem, memopfsf32, SSEPackedSingle, MayLoad>, VEX_4V;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), OpNode, FR64,
f64, f128mem, memopfsf64, SSEPackedDouble, MayLoad>, OpSize,
VEX_4V;
}
def PDrr : PDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
!if(NoPat, []<dag>,
[(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))])> {
let isCommutable = Commutable;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr,
"ps\t{$src2, $dst|$dst, $src2}"), OpNode, FR32, f32,
f128mem, memopfsf32, SSEPackedSingle, MayLoad>, TB;
def PSrm : PSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
!if(NoPat, []<dag>,
[(set FR32:$dst, (OpNode FR32:$src1,
(memopfsf32 addr:$src2)))])> {
let mayLoad = MayLoad;
}
def PDrm : PDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
!if(NoPat, []<dag>,
[(set FR64:$dst, (OpNode FR64:$src1,
(memopfsf64 addr:$src2)))])> {
let mayLoad = MayLoad;
defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr,
"pd\t{$src2, $dst|$dst, $src2}"), OpNode, FR64, f64,
f128mem, memopfsf64, SSEPackedDouble, MayLoad>, TB, OpSize;
}
}
// Alias bitwise logical operations using SSE logical ops on packed FP values.
let Constraints = "$src1 = $dst" in {
defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
let neverHasSideEffects = 1 in
defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1, 1, 0>;
}
let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1>;
/// basic_sse12_fp_binop_rm - SSE 1 & 2 binops come in both scalar and
/// vector forms.

View File

@ -10244,3 +10244,67 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vandps %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
vandps %xmm2, %xmm4, %xmm6
// CHECK: vandpd %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
vandpd %xmm2, %xmm4, %xmm6
// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vorps %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
vorps %xmm2, %xmm4, %xmm6
// CHECK: vorpd %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
vorpd %xmm2, %xmm4, %xmm6
// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vxorps %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
vxorps %xmm2, %xmm4, %xmm6
// CHECK: vxorpd %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
vxorpd %xmm2, %xmm4, %xmm6
// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vandnps %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
vandnps %xmm2, %xmm4, %xmm6
// CHECK: vandnpd %xmm2, %xmm4, %xmm6
// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
vandnpd %xmm2, %xmm4, %xmm6
// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5

View File

@ -296,3 +296,67 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vandps %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
vandps %xmm10, %xmm14, %xmm12
// CHECK: vandpd %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
vandpd %xmm10, %xmm14, %xmm12
// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vorps %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
vorps %xmm10, %xmm14, %xmm12
// CHECK: vorpd %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
vorpd %xmm10, %xmm14, %xmm12
// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vxorps %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
vxorps %xmm10, %xmm14, %xmm12
// CHECK: vxorpd %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
vxorpd %xmm10, %xmm14, %xmm12
// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vandnps %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
vandnps %xmm10, %xmm14, %xmm12
// CHECK: vandnpd %xmm10, %xmm14, %xmm12
// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
vandnpd %xmm10, %xmm14, %xmm12
// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10