[AVX-512] Add execution domain fixing for logical operations with broadcast loads. This builds on the handling of masked ops since we need to keep element size the same.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280464 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2016-09-02 05:29:09 +00:00
parent a3459cf3aa
commit 3409bf2065
3 changed files with 146 additions and 16 deletions

View File

@ -7315,6 +7315,86 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
X86::VPXORQZrrk, X86::VPXORDZrrk },
{ X86::VXORPSZrrkz, X86::VXORPDZrrkz,
X86::VPXORQZrrkz, X86::VPXORDZrrkz },
// Broadcast loads can be handled the same as masked operations to avoid
// changing element size.
{ X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb,
X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb },
{ X86::VANDPSZ128rmb, X86::VANDPDZ128rmb,
X86::VPANDQZ128rmb, X86::VPANDDZ128rmb },
{ X86::VORPSZ128rmb, X86::VORPDZ128rmb,
X86::VPORQZ128rmb, X86::VPORDZ128rmb },
{ X86::VXORPSZ128rmb, X86::VXORPDZ128rmb,
X86::VPXORQZ128rmb, X86::VPXORDZ128rmb },
{ X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb,
X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb },
{ X86::VANDPSZ256rmb, X86::VANDPDZ256rmb,
X86::VPANDQZ256rmb, X86::VPANDDZ256rmb },
{ X86::VORPSZ256rmb, X86::VORPDZ256rmb,
X86::VPORQZ256rmb, X86::VPORDZ256rmb },
{ X86::VXORPSZ256rmb, X86::VXORPDZ256rmb,
X86::VPXORQZ256rmb, X86::VPXORDZ256rmb },
{ X86::VANDNPSZrmb, X86::VANDNPDZrmb,
X86::VPANDNQZrmb, X86::VPANDNDZrmb },
{ X86::VANDPSZrmb, X86::VANDPDZrmb,
X86::VPANDQZrmb, X86::VPANDDZrmb },
{ X86::VANDPSZrmb, X86::VANDPDZrmb,
X86::VPANDQZrmb, X86::VPANDDZrmb },
{ X86::VORPSZrmb, X86::VORPDZrmb,
X86::VPORQZrmb, X86::VPORDZrmb },
{ X86::VXORPSZrmb, X86::VXORPDZrmb,
X86::VPXORQZrmb, X86::VPXORDZrmb },
{ X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
{ X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk,
X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk },
{ X86::VORPSZ128rmbk, X86::VORPDZ128rmbk,
X86::VPORQZ128rmbk, X86::VPORDZ128rmbk },
{ X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk,
X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk },
{ X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
{ X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk,
X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk },
{ X86::VORPSZ256rmbk, X86::VORPDZ256rmbk,
X86::VPORQZ256rmbk, X86::VPORDZ256rmbk },
{ X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk,
X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk },
{ X86::VANDNPSZrmbk, X86::VANDNPDZrmbk,
X86::VPANDNQZrmbk, X86::VPANDNDZrmbk },
{ X86::VANDPSZrmbk, X86::VANDPDZrmbk,
X86::VPANDQZrmbk, X86::VPANDDZrmbk },
{ X86::VANDPSZrmbk, X86::VANDPDZrmbk,
X86::VPANDQZrmbk, X86::VPANDDZrmbk },
{ X86::VORPSZrmbk, X86::VORPDZrmbk,
X86::VPORQZrmbk, X86::VPORDZrmbk },
{ X86::VXORPSZrmbk, X86::VXORPDZrmbk,
X86::VPXORQZrmbk, X86::VPXORDZrmbk },
{ X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
{ X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
{ X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz,
X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz },
{ X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
{ X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
{ X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
{ X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz,
X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz },
{ X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
{ X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz,
X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz },
{ X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
{ X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
{ X86::VORPSZrmbkz, X86::VORPDZrmbkz,
X86::VPORQZrmbkz, X86::VPORDZrmbkz },
{ X86::VXORPSZrmbkz, X86::VXORPDZrmbkz,
X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
};
// FIXME: Some shuffle and unpack instructions have equivalents in different

View File

@ -594,10 +594,30 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
}
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; CHECK-LABEL: orq_broadcast:
; CHECK: ## BB#0:
; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
; AVX512F-LABEL: orq_broadcast:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: orq_broadcast:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: orq_broadcast:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: orq_broadcast:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: orq_broadcast:
; SKX: ## BB#0:
; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
@ -634,10 +654,30 @@ entry:
}
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; CHECK-LABEL: andqbrst:
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
; AVX512F-LABEL: andqbrst:
; AVX512F: ## BB#0: ## %entry
; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: andqbrst:
; AVX512VL: ## BB#0: ## %entry
; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: andqbrst:
; AVX512BW: ## BB#0: ## %entry
; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: andqbrst:
; AVX512DQ: ## BB#0: ## %entry
; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
;
; SKX-LABEL: andqbrst:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
entry:
%a = load i64, i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0

View File

@ -116,10 +116,15 @@ entry:
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; ALL-LABEL: orq_broadcast:
; ALL: ## BB#0:
; ALL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; ALL-NEXT: retq
; KNL-LABEL: orq_broadcast:
; KNL: ## BB#0:
; KNL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: orq_broadcast:
; SKX: ## BB#0:
; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
@ -141,10 +146,15 @@ entry:
}
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; ALL-LABEL: andqbrst:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; ALL-NEXT: retq
; KNL-LABEL: andqbrst:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: andqbrst:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
entry:
%a = load i64, i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0