mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-19 18:26:28 +00:00
Follow-up fix to r165928: handle memset rewriting for widened integers,
and generally clean up the memset handling. It had rotted a bit as the other rewriting logic got polished more. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165930 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bb1078ea13
commit
94fc64c42f
@ -2591,10 +2591,11 @@ private:
|
|||||||
|
|
||||||
// If this doesn't map cleanly onto the alloca type, and that type isn't
|
// If this doesn't map cleanly onto the alloca type, and that type isn't
|
||||||
// a single value type, just emit a memset.
|
// a single value type, just emit a memset.
|
||||||
if (!VecTy && (BeginOffset != NewAllocaBeginOffset ||
|
if (!VecTy && !IntTy &&
|
||||||
EndOffset != NewAllocaEndOffset ||
|
(BeginOffset != NewAllocaBeginOffset ||
|
||||||
!AllocaTy->isSingleValueType() ||
|
EndOffset != NewAllocaEndOffset ||
|
||||||
!TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
|
!AllocaTy->isSingleValueType() ||
|
||||||
|
!TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
|
||||||
Type *SizeTy = II.getLength()->getType();
|
Type *SizeTy = II.getLength()->getType();
|
||||||
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
|
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
|
||||||
CallInst *New
|
CallInst *New
|
||||||
@ -2612,32 +2613,24 @@ private:
|
|||||||
// a sensible representation for the alloca type. This is essentially
|
// a sensible representation for the alloca type. This is essentially
|
||||||
// splatting the byte to a sufficiently wide integer, bitcasting to the
|
// splatting the byte to a sufficiently wide integer, bitcasting to the
|
||||||
// desired scalar type, and splatting it across any desired vector type.
|
// desired scalar type, and splatting it across any desired vector type.
|
||||||
|
uint64_t Size = EndOffset - BeginOffset;
|
||||||
Value *V = II.getValue();
|
Value *V = II.getValue();
|
||||||
IntegerType *VTy = cast<IntegerType>(V->getType());
|
IntegerType *VTy = cast<IntegerType>(V->getType());
|
||||||
Type *IntTy = Type::getIntNTy(VTy->getContext(),
|
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
|
||||||
TD.getTypeSizeInBits(ScalarTy));
|
if (Size*8 > VTy->getBitWidth())
|
||||||
if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth())
|
V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
|
||||||
V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")),
|
|
||||||
ConstantExpr::getUDiv(
|
ConstantExpr::getUDiv(
|
||||||
Constant::getAllOnesValue(IntTy),
|
Constant::getAllOnesValue(SplatIntTy),
|
||||||
ConstantExpr::getZExt(
|
ConstantExpr::getZExt(
|
||||||
Constant::getAllOnesValue(V->getType()),
|
Constant::getAllOnesValue(V->getType()),
|
||||||
IntTy)),
|
SplatIntTy)),
|
||||||
getName(".isplat"));
|
getName(".isplat"));
|
||||||
if (V->getType() != ScalarTy) {
|
|
||||||
if (ScalarTy->isPointerTy())
|
|
||||||
V = IRB.CreateIntToPtr(V, ScalarTy);
|
|
||||||
else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy())
|
|
||||||
V = IRB.CreateBitCast(V, ScalarTy);
|
|
||||||
else if (ScalarTy->isIntegerTy())
|
|
||||||
llvm_unreachable("Computed different integer types with equal widths");
|
|
||||||
else
|
|
||||||
llvm_unreachable("Invalid scalar type");
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is an element-wide memset of a vectorizable alloca, insert it.
|
// If this is an element-wide memset of a vectorizable alloca, insert it.
|
||||||
if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
|
if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
|
||||||
EndOffset < NewAllocaEndOffset)) {
|
EndOffset < NewAllocaEndOffset)) {
|
||||||
|
if (V->getType() != ScalarTy)
|
||||||
|
V = convertValue(TD, IRB, V, ScalarTy);
|
||||||
StoreInst *Store = IRB.CreateAlignedStore(
|
StoreInst *Store = IRB.CreateAlignedStore(
|
||||||
IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
|
IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
|
||||||
NewAI.getAlignment(),
|
NewAI.getAlignment(),
|
||||||
@ -2650,18 +2643,20 @@ private:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Splat to a vector if needed.
|
// If this is a memset on an alloca where we can widen stores, insert the
|
||||||
if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) {
|
// set integer.
|
||||||
VectorType *SplatSourceTy = VectorType::get(V->getType(), 1);
|
if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
|
||||||
V = IRB.CreateShuffleVector(
|
EndOffset < NewAllocaEndOffset)) {
|
||||||
IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V,
|
assert(!II.isVolatile());
|
||||||
IRB.getInt32(0), getName(".vsplat.insert")),
|
StoreInst *Store = insertInteger(IRB, V, BeginOffset);
|
||||||
UndefValue::get(SplatSourceTy),
|
(void)Store;
|
||||||
ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)),
|
DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||||
getName(".vsplat.shuffle"));
|
return true;
|
||||||
assert(V->getType() == VecTy);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (V->getType() != AllocaTy)
|
||||||
|
V = convertValue(TD, IRB, V, AllocaTy);
|
||||||
|
|
||||||
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
|
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
|
||||||
II.isVolatile());
|
II.isVolatile());
|
||||||
(void)New;
|
(void)New;
|
||||||
|
@ -1034,11 +1034,24 @@ entry:
|
|||||||
%X.sroa.0.i = alloca double, align 8
|
%X.sroa.0.i = alloca double, align 8
|
||||||
%0 = bitcast double* %X.sroa.0.i to i8*
|
%0 = bitcast double* %X.sroa.0.i to i8*
|
||||||
call void @llvm.lifetime.start(i64 -1, i8* %0)
|
call void @llvm.lifetime.start(i64 -1, i8* %0)
|
||||||
|
|
||||||
|
; Store to the low 32-bits...
|
||||||
%X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
|
%X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
|
||||||
store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
|
store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
|
||||||
|
|
||||||
|
; Also use a memset to the middle 32-bits for fun.
|
||||||
|
%X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
|
||||||
|
|
||||||
|
; Or a memset of the whole thing.
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
|
||||||
|
|
||||||
|
; Store to the high 32-bits...
|
||||||
%X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
|
%X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
|
||||||
%X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
|
%X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
|
||||||
store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
|
store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
|
||||||
|
|
||||||
|
; Do the actual math...
|
||||||
%X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
|
%X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
|
||||||
%accum.real.i = load double* %d, align 8
|
%accum.real.i = load double* %d, align 8
|
||||||
%add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
|
%add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
|
||||||
|
Loading…
x
Reference in New Issue
Block a user