mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-17 00:27:31 +00:00
start using irbuilder to make mem intrinsics in a few passes.
llvm-svn: 122572
This commit is contained in:
parent
a46fd80777
commit
9007b56712
@ -25,6 +25,7 @@
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/Support/IRBuilder.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include <list>
|
||||
@ -332,8 +333,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
}
|
||||
}
|
||||
|
||||
LLVMContext &Context = SI->getContext();
|
||||
|
||||
// There are two cases that are interesting for this code to handle: memcpy
|
||||
// and memset. Right now we only handle memset.
|
||||
|
||||
@ -345,7 +344,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
return false;
|
||||
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
Module *M = SI->getParent()->getParent()->getParent();
|
||||
|
||||
// Okay, so we now have a single store that can be splatable. Scan to find
|
||||
// all subsequent stores of the same value to offset from the same pointer.
|
||||
@ -431,28 +429,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
|
||||
Alignment = TD->getABITypeAlignment(EltType);
|
||||
}
|
||||
|
||||
// Cast the start ptr to be i8* as memset requires.
|
||||
const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
|
||||
const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
|
||||
StartPTy->getAddressSpace());
|
||||
if (StartPTy!= i8Ptr)
|
||||
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
|
||||
InsertPt);
|
||||
|
||||
Value *Ops[] = {
|
||||
StartPtr, ByteVal, // Start, value
|
||||
// size
|
||||
ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
|
||||
// align
|
||||
ConstantInt::get(Type::getInt32Ty(Context), Alignment),
|
||||
// volatile
|
||||
ConstantInt::getFalse(Context),
|
||||
};
|
||||
const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
|
||||
|
||||
Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
|
||||
|
||||
Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
|
||||
IRBuilder<> Builder(InsertPt);
|
||||
Value *C =
|
||||
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
|
||||
|
||||
DEBUG(dbgs() << "Replace stores:\n";
|
||||
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
|
||||
dbgs() << *Range.TheStores[i] << '\n';
|
||||
@ -663,20 +643,11 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
// If the dest of the second might alias the source of the first, then the
|
||||
// source and dest might overlap. We still want to eliminate the intermediate
|
||||
// value, but we have to generate a memmove instead of memcpy.
|
||||
Intrinsic::ID ResultFn = Intrinsic::memcpy;
|
||||
if (AA.alias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)) !=
|
||||
AliasAnalysis::NoAlias)
|
||||
ResultFn = Intrinsic::memmove;
|
||||
bool UseMemMove = false;
|
||||
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
|
||||
UseMemMove = true;
|
||||
|
||||
// If all checks passed, then we can transform M.
|
||||
const Type *ArgTys[3] = {
|
||||
M->getRawDest()->getType(),
|
||||
MDep->getRawSource()->getType(),
|
||||
M->getLength()->getType()
|
||||
};
|
||||
Function *MemCpyFun =
|
||||
Intrinsic::getDeclaration(MDep->getParent()->getParent()->getParent(),
|
||||
ResultFn, ArgTys, 3);
|
||||
|
||||
// Make sure to use the lesser of the alignment of the source and the dest
|
||||
// since we're changing where we're reading from, but don't want to increase
|
||||
@ -684,14 +655,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
|
||||
// TODO: Is this worth it if we're creating a less aligned memcpy? For
|
||||
// example we could be moving from movaps -> movq on x86.
|
||||
unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
|
||||
Value *Args[5] = {
|
||||
M->getRawDest(),
|
||||
MDep->getRawSource(),
|
||||
M->getLength(),
|
||||
ConstantInt::get(Type::getInt32Ty(MemCpyFun->getContext()), Align),
|
||||
M->getVolatileCst()
|
||||
};
|
||||
CallInst::Create(MemCpyFun, Args, Args+5, "", M);
|
||||
|
||||
IRBuilder<> Builder(M);
|
||||
if (UseMemMove)
|
||||
Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
|
||||
Align, M->isVolatile());
|
||||
else
|
||||
Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
|
||||
Align, M->isVolatile());
|
||||
|
||||
// Remove the instruction we're replacing.
|
||||
MD->removeInstruction(M);
|
||||
@ -722,17 +693,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
|
||||
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
|
||||
if (GV->isConstant() && GV->hasDefinitiveInitializer())
|
||||
if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
|
||||
Value *Ops[] = {
|
||||
M->getRawDest(), ByteVal, // Start, value
|
||||
CopySize, // Size
|
||||
M->getAlignmentCst(), // Alignment
|
||||
ConstantInt::getFalse(M->getContext()), // volatile
|
||||
};
|
||||
const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
|
||||
Module *Mod = M->getParent()->getParent()->getParent();
|
||||
Function *MemSetF = Intrinsic::getDeclaration(Mod, Intrinsic::memset,
|
||||
Tys, 2);
|
||||
CallInst::Create(MemSetF, Ops, Ops+5, "", M);
|
||||
IRBuilder<> Builder(M);
|
||||
Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
|
||||
M->getAlignment(), false);
|
||||
MD->removeInstruction(M);
|
||||
M->eraseFromParent();
|
||||
++NumCpyToSet;
|
||||
@ -765,9 +728,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
|
||||
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
|
||||
|
||||
// See if the pointers alias.
|
||||
if (AA.alias(AA.getLocationForDest(M),
|
||||
AA.getLocationForSource(M)) !=
|
||||
AliasAnalysis::NoAlias)
|
||||
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
|
||||
|
@ -425,7 +425,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
|
||||
continue;
|
||||
}
|
||||
|
||||
IRBuilder<> Builder(User->getParent(), User);
|
||||
IRBuilder<> Builder(User);
|
||||
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
|
||||
// The load is a bit extract from NewAI shifted right by Offset bits.
|
||||
@ -1353,8 +1353,6 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
|
||||
}
|
||||
|
||||
// Process each element of the aggregate.
|
||||
Value *TheFn = MI->getCalledValue();
|
||||
const Type *BytePtrTy = MI->getRawDest()->getType();
|
||||
bool SROADest = MI->getRawDest() == Inst;
|
||||
|
||||
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
|
||||
@ -1448,55 +1446,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
|
||||
// Otherwise, if we're storing a byte variable, use a memset call for
|
||||
// this element.
|
||||
}
|
||||
|
||||
// Cast the element pointer to BytePtrTy.
|
||||
if (EltPtr->getType() != BytePtrTy)
|
||||
EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
|
||||
|
||||
// Cast the other pointer (if we have one) to BytePtrTy.
|
||||
if (OtherElt && OtherElt->getType() != BytePtrTy) {
|
||||
// Preserve address space of OtherElt
|
||||
const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
|
||||
const PointerType* PTy = cast<PointerType>(BytePtrTy);
|
||||
if (OtherPTy->getElementType() != PTy->getElementType()) {
|
||||
Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
|
||||
OtherPTy->getAddressSpace());
|
||||
OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
|
||||
OtherElt->getName(), MI);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
unsigned EltSize = TD->getTypeAllocSize(EltTy);
|
||||
|
||||
IRBuilder<> Builder(MI);
|
||||
|
||||
// Finally, insert the meminst for this element.
|
||||
if (isa<MemTransferInst>(MI)) {
|
||||
Value *Ops[] = {
|
||||
SROADest ? EltPtr : OtherElt, // Dest ptr
|
||||
SROADest ? OtherElt : EltPtr, // Src ptr
|
||||
ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
|
||||
// Align
|
||||
ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
|
||||
MI->getVolatileCst()
|
||||
};
|
||||
// In case we fold the address space overloaded memcpy of A to B
|
||||
// with memcpy of B to C, change the function to be a memcpy of A to C.
|
||||
const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
|
||||
Ops[2]->getType() };
|
||||
Module *M = MI->getParent()->getParent()->getParent();
|
||||
TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
|
||||
CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
|
||||
if (isa<MemSetInst>(MI)) {
|
||||
Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
|
||||
MI->isVolatile());
|
||||
} else {
|
||||
assert(isa<MemSetInst>(MI));
|
||||
Value *Ops[] = {
|
||||
EltPtr, MI->getArgOperand(1), // Dest, Value,
|
||||
ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
|
||||
Zero, // Align
|
||||
ConstantInt::getFalse(MI->getContext()) // isVolatile
|
||||
};
|
||||
const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
|
||||
Module *M = MI->getParent()->getParent()->getParent();
|
||||
TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
|
||||
CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
|
||||
assert(isa<MemTransferInst>(MI));
|
||||
Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
|
||||
Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
|
||||
|
||||
if (isa<MemCpyInst>(MI))
|
||||
Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
|
||||
else
|
||||
Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
|
||||
}
|
||||
}
|
||||
DeadInsts.push_back(MI);
|
||||
|
@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
|
||||
; The resulting memset is only 4-byte aligned, despite containing
|
||||
; a 16-byte alignmed store in the middle.
|
||||
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* %a01, i8 0, i64 16, i32 4, i1 false)
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
|
||||
|
||||
define void @foo(i32* %p) {
|
||||
%a0 = getelementptr i32* %p, i64 0
|
||||
|
Loading…
Reference in New Issue
Block a user