[SelectionDAG] set dereferenceable flag when expanding memcpy/memmove

When SelectionDAG expands memcpy (or memmove) call into a sequence of load and store instructions, it disregards dereferenceable flag even the source pointer is known to be dereferenceable.
This results in an assertion failure if SelectionDAG commonizes a load instruction generated for memcpy with another load instruction for the source pointer.
This patch makes SelectionDAG to set the dereferenceable flag for the load instructions properly to avoid the assertion failure.

Differential Revision: https://reviews.llvm.org/D34467




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306209 cdac9f57-aa62-4fd3-8940-286f4534e8a0
This commit is contained in:
Hiroshi Inoue 2017-06-24 15:17:38 +00:00
parent 7f3bf01d63
commit 059bd0e36c
4 changed files with 122 additions and 8 deletions

View File

@ -59,6 +59,11 @@ struct MachinePointerInfo {
return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O);
}
/// Return true if memory region [V, V+Offset+Size) is known to be
/// dereferenceable.
bool isDereferenceable(unsigned Size, LLVMContext &C,
const DataLayout &DL) const;
/// Return the LLVM IR address space number that this pointer points into.
unsigned getAddrSpace() const;

View File

@ -21,6 +21,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const {
return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
}
/// isDereferenceable - Return true if V is always dereferenceable for
/// Offset + Size byte.
bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
const DataLayout &DL) const {
if (!V.is<const Value*>())
return false;
const Value *BasePtr = V.get<const Value*>();
if (BasePtr == nullptr)
return false;
return isDereferenceableAndAlignedPointer(BasePtr, 1,
APInt(DL.getPointerSize(),
Offset + Size),
DL);
}
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {

View File

@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// TODO: In the AlwaysInline case, if the size is big then generate a loop
// rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
Type *Ty = MemOps[0].getTypeForEVT(C);
unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
// to Load/Store if NVT==VT.
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
EVT NVT = TLI.getTypeToTransformTo(C, VT);
assert(NVT.bitsGE(VT));
bool isDereferenceable =
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
MinAlign(SrcAlign, SrcOff), MMOFlags);
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
OutChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// Expand memmove to a series of load and store ops if the size operand falls
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
Type *Ty = MemOps[0].getTypeForEVT(C);
unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
bool isDereferenceable =
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
Value =
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;

View File

@ -0,0 +1,74 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; This code causes an assertion failure if dereferenceable flag is not properly set in the load generated for memcpy
; CHECK-LABEL: @func
; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK-NOT: lxvd2x
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK: blr
define void @func(i1 %flag) {
entry:
%pairs = alloca [4 x <2 x i64>], align 8
%pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
%pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
%pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
%pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
%dst = bitcast [4 x <2 x i64>]* %pairs to i8*
%src = bitcast <2 x i64>* %pair2 to i8*
br i1 %flag, label %end, label %dummy
end:
; copy third element into first element by memcpy
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
; copy third element into second element by LD/ST
%vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
ret void
dummy:
; to make use of %src in another BB
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
br label %end
}
; CHECK-LABEL: @func2
; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK-NOT: lxvd2x
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
; CHECK: blr
define void @func2(i1 %flag) {
entry:
%pairs = alloca [4 x <2 x i64>], align 8
%pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
%pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
%pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
%pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
%dst = bitcast [4 x <2 x i64>]* %pairs to i8*
%src = bitcast <2 x i64>* %pair2 to i8*
br i1 %flag, label %end, label %dummy
end:
; copy third element into first element by memcpy
call void @llvm.memmove.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
; copy third element into second element by LD/ST
%vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
ret void
dummy:
; to make use of %src in another BB
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
br label %end
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
attributes #1 = { argmemonly nounwind }