mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 21:50:40 +00:00
[SelectionDAG] set dereferenceable flag when expanding memcpy/memmove
When SelectionDAG expands memcpy (or memmove) call into a sequence of load and store instructions, it disregards dereferenceable flag even the source pointer is known to be dereferenceable. This results in an assertion failure if SelectionDAG commonizes a load instruction generated for memcpy with another load instruction for the source pointer. This patch makes SelectionDAG to set the dereferenceable flag for the load instructions properly to avoid the assertion failure. Differential Revision: https://reviews.llvm.org/D34467 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306209 cdac9f57-aa62-4fd3-8940-286f4534e8a0
This commit is contained in:
parent
7f3bf01d63
commit
059bd0e36c
@ -59,6 +59,11 @@ struct MachinePointerInfo {
|
||||
return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset+O);
|
||||
}
|
||||
|
||||
/// Return true if memory region [V, V+Offset+Size) is known to be
|
||||
/// dereferenceable.
|
||||
bool isDereferenceable(unsigned Size, LLVMContext &C,
|
||||
const DataLayout &DL) const;
|
||||
|
||||
/// Return the LLVM IR address space number that this pointer points into.
|
||||
unsigned getAddrSpace() const;
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
@ -558,6 +559,23 @@ unsigned MachinePointerInfo::getAddrSpace() const {
|
||||
return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
|
||||
}
|
||||
|
||||
/// isDereferenceable - Return true if V is always dereferenceable for
|
||||
/// Offset + Size byte.
|
||||
bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
|
||||
const DataLayout &DL) const {
|
||||
if (!V.is<const Value*>())
|
||||
return false;
|
||||
|
||||
const Value *BasePtr = V.get<const Value*>();
|
||||
if (BasePtr == nullptr)
|
||||
return false;
|
||||
|
||||
return isDereferenceableAndAlignedPointer(BasePtr, 1,
|
||||
APInt(DL.getPointerSize(),
|
||||
Offset + Size),
|
||||
DL);
|
||||
}
|
||||
|
||||
/// getConstantPool - Return a MachinePointerInfo record that refers to the
|
||||
/// constant pool.
|
||||
MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
|
||||
|
@ -4897,6 +4897,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||
// TODO: In the AlwaysInline case, if the size is big then generate a loop
|
||||
// rather than maybe a humongous number of loads and stores.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
const DataLayout &DL = DAG.getDataLayout();
|
||||
LLVMContext &C = *DAG.getContext();
|
||||
std::vector<EVT> MemOps;
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
@ -4923,15 +4925,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
|
||||
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
|
||||
Type *Ty = MemOps[0].getTypeForEVT(C);
|
||||
unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
|
||||
|
||||
// Don't promote to an alignment that would require dynamic stack
|
||||
// realignment.
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
if (!TRI->needsStackRealignment(MF))
|
||||
while (NewAlign > Align &&
|
||||
DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
|
||||
DL.exceedsNaturalStackAlignment(NewAlign))
|
||||
NewAlign /= 2;
|
||||
|
||||
if (NewAlign > Align) {
|
||||
@ -4991,12 +4993,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
|
||||
// to Load/Store if NVT==VT.
|
||||
// FIXME does the case above also need this?
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
|
||||
EVT NVT = TLI.getTypeToTransformTo(C, VT);
|
||||
assert(NVT.bitsGE(VT));
|
||||
|
||||
bool isDereferenceable =
|
||||
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
|
||||
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
|
||||
if (isDereferenceable)
|
||||
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
|
||||
|
||||
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
|
||||
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
|
||||
SrcPtrInfo.getWithOffset(SrcOff), VT,
|
||||
MinAlign(SrcAlign, SrcOff), MMOFlags);
|
||||
MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
|
||||
OutChains.push_back(Value.getValue(1));
|
||||
Store = DAG.getTruncStore(
|
||||
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
|
||||
@ -5024,6 +5033,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||
// Expand memmove to a series of load and store ops if the size operand falls
|
||||
// below a certain threshold.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
const DataLayout &DL = DAG.getDataLayout();
|
||||
LLVMContext &C = *DAG.getContext();
|
||||
std::vector<EVT> MemOps;
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
@ -5046,8 +5057,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
|
||||
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
|
||||
Type *Ty = MemOps[0].getTypeForEVT(C);
|
||||
unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
|
||||
if (NewAlign > Align) {
|
||||
// Give the stack frame object a larger alignment if needed.
|
||||
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
|
||||
@ -5068,9 +5079,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
SDValue Value;
|
||||
|
||||
bool isDereferenceable =
|
||||
SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
|
||||
MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
|
||||
if (isDereferenceable)
|
||||
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
|
||||
|
||||
Value =
|
||||
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
|
||||
SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
|
||||
SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
|
||||
LoadValues.push_back(Value);
|
||||
LoadChains.push_back(Value.getValue(1));
|
||||
SrcOff += VTSize;
|
||||
|
74
test/CodeGen/PowerPC/memcpy_dereferenceable.ll
Normal file
74
test/CodeGen/PowerPC/memcpy_dereferenceable.ll
Normal file
@ -0,0 +1,74 @@
|
||||
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; This code causes an assertion failure if dereferenceable flag is not properly set in the load generated for memcpy
|
||||
|
||||
; CHECK-LABEL: @func
|
||||
; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOT: lxvd2x
|
||||
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: blr
|
||||
|
||||
define void @func(i1 %flag) {
|
||||
entry:
|
||||
%pairs = alloca [4 x <2 x i64>], align 8
|
||||
%pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
|
||||
%pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
|
||||
%pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
|
||||
%pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
|
||||
%dst = bitcast [4 x <2 x i64>]* %pairs to i8*
|
||||
%src = bitcast <2 x i64>* %pair2 to i8*
|
||||
br i1 %flag, label %end, label %dummy
|
||||
|
||||
end:
|
||||
; copy third element into first element by memcpy
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
|
||||
; copy third element into second element by LD/ST
|
||||
%vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
|
||||
store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
|
||||
ret void
|
||||
|
||||
dummy:
|
||||
; to make use of %src in another BB
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
|
||||
br label %end
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: @func2
|
||||
; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOT: lxvd2x
|
||||
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: blr
|
||||
|
||||
define void @func2(i1 %flag) {
|
||||
entry:
|
||||
%pairs = alloca [4 x <2 x i64>], align 8
|
||||
%pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1
|
||||
%pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2
|
||||
%pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>*
|
||||
%pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>*
|
||||
%dst = bitcast [4 x <2 x i64>]* %pairs to i8*
|
||||
%src = bitcast <2 x i64>* %pair2 to i8*
|
||||
br i1 %flag, label %end, label %dummy
|
||||
|
||||
end:
|
||||
; copy third element into first element by memcpy
|
||||
call void @llvm.memmove.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false)
|
||||
; copy third element into second element by LD/ST
|
||||
%vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8
|
||||
store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8
|
||||
ret void
|
||||
|
||||
dummy:
|
||||
; to make use of %src in another BB
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false)
|
||||
br label %end
|
||||
}
|
||||
|
||||
; Function Attrs: argmemonly nounwind
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
|
||||
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
|
||||
|
||||
attributes #1 = { argmemonly nounwind }
|
Loading…
Reference in New Issue
Block a user