mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 00:14:20 +00:00
AVX512: Implemented encoding, intrinsics and DAG lowering for VMOVDDUP instructions.
Differential Revision: http://reviews.llvm.org/D14702 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253548 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
499f1059a5
commit
e20dfebf87
@ -1618,6 +1618,24 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_movddup_128 :
|
||||
GCCBuiltin<"__builtin_ia32_movddup128_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_movddup_256 :
|
||||
GCCBuiltin<"__builtin_ia32_movddup256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_movddup_512 :
|
||||
GCCBuiltin<"__builtin_ia32_movddup512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector blend
|
||||
|
@ -309,39 +309,25 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
CASE_MOVDUP(MOVSLDUP, r)
|
||||
Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
|
||||
// FALL THROUGH.
|
||||
CASE_MOVDUP(MOVSLDUP, m) {
|
||||
MVT VT = getRegOperandVectorVT(MI, MVT::f32, 0);
|
||||
CASE_MOVDUP(MOVSLDUP, m)
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
DecodeMOVSLDUPMask(VT, ShuffleMask);
|
||||
DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
|
||||
break;
|
||||
}
|
||||
|
||||
CASE_MOVDUP(MOVSHDUP, r)
|
||||
Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
|
||||
// FALL THROUGH.
|
||||
CASE_MOVDUP(MOVSHDUP, m) {
|
||||
MVT VT = getRegOperandVectorVT(MI, MVT::f32, 0);
|
||||
CASE_MOVDUP(MOVSHDUP, m)
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
DecodeMOVSHDUPMask(VT, ShuffleMask);
|
||||
break;
|
||||
}
|
||||
|
||||
case X86::VMOVDDUPYrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VMOVDDUPYrm:
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
|
||||
DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::MOVDDUPrr:
|
||||
case X86::VMOVDDUPrr:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
CASE_MOVDUP(MOVDDUP, r)
|
||||
Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::MOVDDUPrm:
|
||||
case X86::VMOVDDUPrm:
|
||||
CASE_MOVDUP(MOVDDUP, m)
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
|
||||
DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::PSLLDQri:
|
||||
|
@ -4225,26 +4225,6 @@ multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
|
||||
|
||||
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - MOVDDUP
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
|
||||
X86MemOperand x86memop, PatFrag memop_frag> {
|
||||
def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
|
||||
def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
|
||||
}
|
||||
|
||||
defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>,
|
||||
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
|
||||
(VMOVDDUPZrm addr:$src)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Move Low to High and High to Low packed FP Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -7128,6 +7108,52 @@ multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
|
||||
|
||||
defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
|
||||
defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - MOVDDUP
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src), OpcodeStr, "$src", "$src",
|
||||
(_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
|
||||
let mayLoad = 1 in
|
||||
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
|
||||
(_.VT (OpNode (_.VT (scalar_to_vector
|
||||
(_.ScalarLdFrag addr:$src)))))>,
|
||||
EVEX, EVEX_CD8<_.EltSize, CD8VH>;
|
||||
}
|
||||
|
||||
multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
AVX512VLVectorVTInfo VTInfo> {
|
||||
|
||||
defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
|
||||
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
|
||||
EVEX_V256;
|
||||
defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
|
||||
EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
|
||||
defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode,
|
||||
avx512vl_f64_info>, XD, VEX_W;
|
||||
let isCodeGenOnly = 1 in
|
||||
defm NAME#_I: avx512_movddup_common<opc, OpcodeStr, OpNode,
|
||||
avx512vl_i64_info>;
|
||||
}
|
||||
|
||||
defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
|
||||
|
||||
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
|
||||
(VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
|
||||
def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - Unpack Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -5206,21 +5206,30 @@ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst,
|
||||
(v4f64 (X86Movddup
|
||||
(scalar_to_vector (loadf64 addr:$src)))))]>,
|
||||
(v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
|
||||
Sched<[WriteLoad]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
|
||||
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
|
||||
}
|
||||
|
||||
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
|
||||
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
|
||||
|
||||
// 256-bit version
|
||||
def : Pat<(X86Movddup (loadv4i64 addr:$src)),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (v4i64 VR256:$src)),
|
||||
(VMOVDDUPYrr VR256:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
|
||||
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
|
||||
def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
|
||||
@ -5228,16 +5237,6 @@ let Predicates = [HasAVX] in {
|
||||
def : Pat<(X86Movddup (bc_v2f64
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
|
||||
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
|
||||
|
||||
// 256-bit version
|
||||
def : Pat<(X86Movddup (loadv4f64 addr:$src)),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (loadv4i64 addr:$src)),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
|
||||
(VMOVDDUPYrm addr:$src)>;
|
||||
def : Pat<(X86Movddup (v4i64 VR256:$src)),
|
||||
(VMOVDDUPYrr VR256:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseAVX, OptForSize] in {
|
||||
|
@ -798,6 +798,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::FMIN, X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
|
||||
X86ISD::FMIN, X86ISD::FMIN_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::MOVDDUP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::MOVDDUP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::MOVDDUP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::MOVSHDUP, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
|
||||
|
@ -406,3 +406,26 @@ define void @store_h_double(<2 x double> %x, i64* %p) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x double> @test39(double* %ptr) nounwind {
|
||||
%a = load double, double* %ptr
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define <2 x double> @test40(<2 x double>* %ptr) nounwind {
|
||||
%v = load <2 x double>, <2 x double>* %ptr
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
|
@ -4750,3 +4750,27 @@ define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovddup %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: ## zmm1 = zmm0[0,0,2,2,4,4,6,6]
|
||||
; CHECK-NEXT: vmovddup %zmm0, %zmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6]
|
||||
; CHECK-NEXT: vmovddup %zmm0, %zmm0
|
||||
; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6]
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res2, %res3
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
|
@ -5483,4 +5483,50 @@ define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x f
|
||||
%res4 = fadd <8 x float> %res2, %res3
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: ## xmm1 = xmm0[0,0]
|
||||
; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## xmm2 = xmm0[0,0]
|
||||
; CHECK-NEXT: vmovddup %xmm0, %xmm0
|
||||
; CHECK-NEXT: ## xmm0 = xmm0[0,0]
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
%res4 = fadd <2 x double> %res2, %res3
|
||||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
|
||||
; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2]
|
||||
; CHECK-NEXT: vmovddup %ymm0, %ymm0
|
||||
; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2]
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res2, %res3
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
@ -1361,27 +1361,48 @@ define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v2f64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v2f64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_mem_v2f64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX-LABEL: insert_dup_mem_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%a = load double, double* %ptr
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
|
||||
; SSE2-LABEL: insert_dup_mem128_v2f64:
|
||||
; SSE2: # BB#0:
|
||||
; SSE2-NEXT: movaps (%rdi), %xmm0
|
||||
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: insert_dup_mem128_v2f64:
|
||||
; SSE3: # BB#0:
|
||||
; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; SSE3-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem128_v2f64:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_mem128_v2f64:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: insert_dup_mem128_v2f64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
|
||||
; AVX-NEXT: retq
|
||||
%v = load <2 x double>, <2 x double>* %ptr
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
||||
|
||||
define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v2i64:
|
||||
; SSE: # BB#0:
|
||||
|
@ -188,6 +188,16 @@ define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v4f64mem_0022:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
|
||||
; ALL-NEXT: retq
|
||||
%a = load <4 x double>, <4 x double>* %ptr
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL-LABEL: shuffle_v4f64_1032:
|
||||
; ALL: # BB#0:
|
||||
|
@ -18561,6 +18561,42 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff]
|
||||
vmovhpd %xmm25, -1032(%rdx)
|
||||
|
||||
// CHECK: vmovddup %zmm29, %zmm5
|
||||
// CHECK: encoding: [0x62,0x91,0xff,0x48,0x12,0xed]
|
||||
vmovddup %zmm29, %zmm5
|
||||
|
||||
// CHECK: vmovddup %zmm29, %zmm5 {%k4}
|
||||
// CHECK: encoding: [0x62,0x91,0xff,0x4c,0x12,0xed]
|
||||
vmovddup %zmm29, %zmm5 {%k4}
|
||||
|
||||
// CHECK: vmovddup %zmm29, %zmm5 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x91,0xff,0xcc,0x12,0xed]
|
||||
vmovddup %zmm29, %zmm5 {%k4} {z}
|
||||
|
||||
// CHECK: vmovddup (%rcx), %zmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x29]
|
||||
vmovddup (%rcx), %zmm5
|
||||
|
||||
// CHECK: vmovddup 291(%rax,%r14,8), %zmm5
|
||||
// CHECK: encoding: [0x62,0xb1,0xff,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovddup 291(%rax,%r14,8), %zmm5
|
||||
|
||||
// CHECK: vmovddup 8128(%rdx), %zmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x7f]
|
||||
vmovddup 8128(%rdx), %zmm5
|
||||
|
||||
// CHECK: vmovddup 8192(%rdx), %zmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
|
||||
vmovddup 8192(%rdx), %zmm5
|
||||
|
||||
// CHECK: vmovddup -8192(%rdx), %zmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x80]
|
||||
vmovddup -8192(%rdx), %zmm5
|
||||
|
||||
// CHECK: vmovddup -8256(%rdx), %zmm5
|
||||
// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vmovddup -8256(%rdx), %zmm5
|
||||
|
||||
// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21
|
||||
// CHECK: encoding: [0x62,0x31,0xcf,0x00,0x11,0xfd]
|
||||
vmovsd.s %xmm15, %xmm22, %xmm21
|
||||
|
@ -22123,6 +22123,78 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
|
||||
// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0xe0,0xef,0xff,0xff]
|
||||
vmovsldup -4128(%rdx), %ymm24
|
||||
|
||||
// CHECK: vmovddup %xmm23, %xmm17
|
||||
// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x12,0xcf]
|
||||
vmovddup %xmm23, %xmm17
|
||||
|
||||
// CHECK: vmovddup %xmm23, %xmm17 {%k6}
|
||||
// CHECK: encoding: [0x62,0xa1,0xff,0x0e,0x12,0xcf]
|
||||
vmovddup %xmm23, %xmm17 {%k6}
|
||||
|
||||
// CHECK: vmovddup %xmm23, %xmm17 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0xa1,0xff,0x8e,0x12,0xcf]
|
||||
vmovddup %xmm23, %xmm17 {%k6} {z}
|
||||
|
||||
// CHECK: vmovddup (%rcx), %xmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x09]
|
||||
vmovddup (%rcx), %xmm17
|
||||
|
||||
// CHECK: vmovddup 291(%rax,%r14,8), %xmm17
|
||||
// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovddup 291(%rax,%r14,8), %xmm17
|
||||
|
||||
// CHECK: vmovddup 1016(%rdx), %xmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x7f]
|
||||
vmovddup 1016(%rdx), %xmm17
|
||||
|
||||
// CHECK: vmovddup 1024(%rdx), %xmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0x00,0x04,0x00,0x00]
|
||||
vmovddup 1024(%rdx), %xmm17
|
||||
|
||||
// CHECK: vmovddup -1024(%rdx), %xmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x80]
|
||||
vmovddup -1024(%rdx), %xmm17
|
||||
|
||||
// CHECK: vmovddup -1032(%rdx), %xmm17
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0xf8,0xfb,0xff,0xff]
|
||||
vmovddup -1032(%rdx), %xmm17
|
||||
|
||||
// CHECK: vmovddup %ymm25, %ymm18
|
||||
// CHECK: encoding: [0x62,0x81,0xff,0x28,0x12,0xd1]
|
||||
vmovddup %ymm25, %ymm18
|
||||
|
||||
// CHECK: vmovddup %ymm25, %ymm18 {%k4}
|
||||
// CHECK: encoding: [0x62,0x81,0xff,0x2c,0x12,0xd1]
|
||||
vmovddup %ymm25, %ymm18 {%k4}
|
||||
|
||||
// CHECK: vmovddup %ymm25, %ymm18 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x81,0xff,0xac,0x12,0xd1]
|
||||
vmovddup %ymm25, %ymm18 {%k4} {z}
|
||||
|
||||
// CHECK: vmovddup (%rcx), %ymm18
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x11]
|
||||
vmovddup (%rcx), %ymm18
|
||||
|
||||
// CHECK: vmovddup 291(%rax,%r14,8), %ymm18
|
||||
// CHECK: encoding: [0x62,0xa1,0xff,0x28,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovddup 291(%rax,%r14,8), %ymm18
|
||||
|
||||
// CHECK: vmovddup 4064(%rdx), %ymm18
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x7f]
|
||||
vmovddup 4064(%rdx), %ymm18
|
||||
|
||||
// CHECK: vmovddup 4096(%rdx), %ymm18
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0x00,0x10,0x00,0x00]
|
||||
vmovddup 4096(%rdx), %ymm18
|
||||
|
||||
// CHECK: vmovddup -4096(%rdx), %ymm18
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x80]
|
||||
vmovddup -4096(%rdx), %ymm18
|
||||
|
||||
// CHECK: vmovddup -4128(%rdx), %ymm18
|
||||
// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0xe0,0xef,0xff,0xff]
|
||||
vmovddup -4128(%rdx), %ymm18
|
||||
|
||||
// CHECK: vmovapd.s %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x01,0xfd,0x08,0x29,0xda]
|
||||
vmovapd.s %xmm27, %xmm26
|
||||
|
Loading…
x
Reference in New Issue
Block a user