[SelectionDAG][X86] Fix incorrect offset generated for VMASKMOV

When creating high MachineMemOperand for MSTORE/MLOAD we supply
it with the original PointerInfo, while the pointer itself had been incremented.
The patch adds the proper offset to the PointerInfo.

llvm-svn: 325135
This commit is contained in:
Alexander Ivchenko 2018-02-14 15:55:24 +00:00
parent 81e6c94efb
commit 833b4ee927
4 changed files with 26 additions and 23 deletions

View File

@ -6835,12 +6835,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MST->isCompressingStore());
unsigned HiOffset = LoMemVT.getStoreSize();
MMO = DAG.getMachineFunction().
getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MST->getPointerInfo().getWithOffset(HiOffset),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
MST->getAAInfo(), MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
MST->isTruncatingStore(),
@ -6985,11 +6985,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
MLD->isExpandingLoad());
unsigned HiOffset = LoMemVT.getStoreSize();
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ISD::NON_EXTLOAD, MLD->isExpandingLoad());

View File

@ -1210,16 +1210,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
unsigned HiOffset = LoMemVT.getStoreSize();
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ExtType, MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
@ -1928,10 +1928,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
N->isCompressingStore());
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
unsigned HiOffset = LoMemVT.getStoreSize();
MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
N->isTruncatingStore(), N->isCompressingStore());

View File

@ -388,11 +388,11 @@ define void @test18(double* %base, <16 x double> %V, <16 x i1> %mask) {
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k2
; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: popcntl %eax, %eax
; KNL-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
; KNL-NEXT: retq
call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask)
ret void

View File

@ -9,8 +9,8 @@ define void @test_v16f() local_unnamed_addr {
; CHECK: bb.0.bb:
; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec, align 4)
; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
; CHECK: RET 0
bb:
@ -29,8 +29,8 @@ define void @test_v8d() local_unnamed_addr {
; CHECK: bb.0.bb:
; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
; CHECK: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec, align 4)
; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
; CHECK: RET 0
bb: