mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-28 08:59:28 +00:00
Avoid using f64 to lower memcpy from constant string. It's cheaper to use i32 store of immediates.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100751 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8ef5caa80a
commit
c3b0c341e7
@ -638,11 +638,13 @@ public:
|
||||
/// probably because the source does not need to be loaded. If
|
||||
/// 'NonScalarIntSafe' is true, that means it's safe to return a
|
||||
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
|
||||
/// from memory. It returns EVT::Other if SelectionDAG should be responsible
|
||||
/// for determining it.
|
||||
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
|
||||
/// constant so it does not need to be loaded.
|
||||
/// It returns EVT::Other if SelectionDAG should be responsible for
|
||||
/// determining the type.
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe,
|
||||
bool NonScalarIntSafe, bool MemcpyStrSrc,
|
||||
SelectionDAG &DAG) const {
|
||||
return MVT::Other;
|
||||
}
|
||||
|
@ -3210,6 +3210,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
unsigned Limit, uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe,
|
||||
bool MemcpyStrSrc,
|
||||
SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
|
||||
@ -3218,9 +3219,11 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
// the value, i.e. memset or memcpy from constant string. Otherwise, it's
|
||||
// the inferred alignment of the source. 'DstAlign', on the other hand, is the
|
||||
// specified alignment of the memory operation. If it is zero, that means
|
||||
// it's possible to change the alignment of the destination.
|
||||
// it's possible to change the alignment of the destination. 'MemcpyStrSrc'
|
||||
// indicates whether the memcpy source is constant so it does not need to be
|
||||
// loaded.
|
||||
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
|
||||
NonScalarIntSafe, DAG);
|
||||
NonScalarIntSafe, MemcpyStrSrc, DAG);
|
||||
|
||||
if (VT == MVT::Other) {
|
||||
if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
|
||||
@ -3286,9 +3289,6 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
// below a certain threshold.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
std::vector<EVT> MemOps;
|
||||
uint64_t Limit = -1ULL;
|
||||
if (!AlwaysInline)
|
||||
Limit = TLI.getMaxStoresPerMemcpy();
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
@ -3300,9 +3300,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
std::string Str;
|
||||
bool CopyFromStr = isMemSrcFromString(Src, Str);
|
||||
bool isZeroStr = CopyFromStr && Str.empty();
|
||||
uint64_t Limit = -1ULL;
|
||||
if (!AlwaysInline)
|
||||
Limit = TLI.getMaxStoresPerMemcpy();
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
(isZeroStr ? 0 : SrcAlign), true, DAG, TLI))
|
||||
(isZeroStr ? 0 : SrcAlign),
|
||||
true, CopyFromStr, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
@ -3390,7 +3394,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
SrcAlign, true, DAG, TLI))
|
||||
SrcAlign, true, false, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
@ -3462,7 +3466,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
|
||||
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
|
||||
Size, (DstAlignCanChange ? 0 : Align), 0,
|
||||
NonScalarIntSafe, DAG, TLI))
|
||||
NonScalarIntSafe, false, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
|
@ -5547,11 +5547,14 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
||||
/// probably because the source does not need to be loaded. If
|
||||
/// 'NonScalarIntSafe' is true, that means it's safe to return a
|
||||
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
|
||||
/// from memory. It returns EVT::Other if SelectionDAG should be responsible
|
||||
/// for determining it.
|
||||
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
|
||||
/// constant so it does not need to be loaded.
|
||||
/// It returns EVT::Other if SelectionDAG should be responsible for
|
||||
/// determining the type.
|
||||
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe,
|
||||
bool MemcpyStrSrc,
|
||||
SelectionDAG &DAG) const {
|
||||
if (this->PPCSubTarget.isPPC64()) {
|
||||
return MVT::i64;
|
||||
|
@ -355,12 +355,14 @@ namespace llvm {
|
||||
/// probably because the source does not need to be loaded. If
|
||||
/// 'NonScalarIntSafe' is true, that means it's safe to return a
|
||||
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
|
||||
/// from memory. It returns EVT::Other if SelectionDAG should be responsible
|
||||
/// for determining it.
|
||||
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
|
||||
/// constant so it does not need to be loaded.
|
||||
/// It returns EVT::Other if SelectionDAG should be responsible for
|
||||
/// determining the type.
|
||||
virtual EVT
|
||||
getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe, SelectionDAG &DAG) const;
|
||||
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe, bool MemcpyStrSrc,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
||||
virtual unsigned getFunctionAlignment(const Function *F) const;
|
||||
|
@ -1067,18 +1067,22 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
||||
}
|
||||
|
||||
/// getOptimalMemOpType - Returns the target specific optimal type for load
|
||||
/// and store operations as a result of memset, memcpy, and memmove lowering.
|
||||
/// If DstAlign is zero that means it's safe to destination alignment can
|
||||
/// satisfy any constraint. Similarly if SrcAlign is zero it means there
|
||||
/// isn't a need to check it against alignment requirement, probably because
|
||||
/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that
|
||||
/// means it's safe to return a non-scalar-integer type, e.g. constant string
|
||||
/// source or loaded from memory. It returns EVT::Other if SelectionDAG should
|
||||
/// be responsible for determining it.
|
||||
/// and store operations as a result of memset, memcpy, and memmove
|
||||
/// lowering. If DstAlign is zero that means it's safe to destination
|
||||
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
|
||||
/// means there isn't a need to check it against alignment requirement,
|
||||
/// probably because the source does not need to be loaded. If
|
||||
/// 'NonScalarIntSafe' is true, that means it's safe to return a
|
||||
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
|
||||
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
|
||||
/// constant so it does not need to be loaded.
|
||||
/// It returns EVT::Other if SelectionDAG should be responsible for
|
||||
/// determining the type.
|
||||
EVT
|
||||
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe,
|
||||
bool MemcpyStrSrc,
|
||||
SelectionDAG &DAG) const {
|
||||
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
|
||||
// linux. This is because the stack realignment code can't handle certain
|
||||
@ -1095,11 +1099,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
return MVT::v4i32;
|
||||
if (Subtarget->hasSSE1())
|
||||
return MVT::v4f32;
|
||||
} else if (Size >= 8 &&
|
||||
} else if (!MemcpyStrSrc && Size >= 8 &&
|
||||
!Subtarget->is64Bit() &&
|
||||
Subtarget->getStackAlignment() >= 8 &&
|
||||
Subtarget->hasSSE2())
|
||||
Subtarget->hasSSE2()) {
|
||||
// Do not use f64 to lower memcpy if source is string constant. It's
|
||||
// better to use i32 to avoid the loads.
|
||||
return MVT::f64;
|
||||
}
|
||||
}
|
||||
if (Subtarget->is64Bit() && Size >= 8)
|
||||
return MVT::i64;
|
||||
@ -6721,7 +6728,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
|
||||
Count, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
|
||||
X86::EDI,
|
||||
X86::EDI,
|
||||
Dst, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
|
||||
|
@ -424,12 +424,14 @@ namespace llvm {
|
||||
/// probably because the source does not need to be loaded. If
|
||||
/// 'NonScalarIntSafe' is true, that means it's safe to return a
|
||||
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
|
||||
/// from memory. It returns EVT::Other if SelectionDAG should be responsible
|
||||
/// for determining it.
|
||||
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
|
||||
/// constant so it does not need to be loaded.
|
||||
/// It returns EVT::Other if SelectionDAG should be responsible for
|
||||
/// determining the type.
|
||||
virtual EVT
|
||||
getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe, SelectionDAG &DAG) const;
|
||||
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
|
||||
bool NonScalarIntSafe, bool MemcpyStrSrc,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses. of the specified type.
|
||||
|
@ -3,20 +3,20 @@
|
||||
; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
|
||||
|
||||
%struct.ParmT = type { [25 x i8], i8, i8* }
|
||||
@.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" ; <[25 x i8]*> [#uses=1]
|
||||
@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
|
||||
@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
|
||||
|
||||
define void @t1(i32 %argc, i8** %argv) nounwind {
|
||||
entry:
|
||||
; SSE2: t1:
|
||||
; SSE2: movaps _.str12, %xmm0
|
||||
; SSE2: movaps _.str, %xmm0
|
||||
; SSE2: movaps %xmm0
|
||||
; SSE2: movb $0
|
||||
; SSE2: movl $0
|
||||
; SSE2: movl $0
|
||||
|
||||
; SSE1: t1:
|
||||
; SSE1: movaps _.str12, %xmm0
|
||||
; SSE1: movaps _.str, %xmm0
|
||||
; SSE1: movaps %xmm0
|
||||
; SSE1: movb $0
|
||||
; SSE1: movl $0
|
||||
@ -32,14 +32,14 @@ entry:
|
||||
; NOSSE: movl $1734438249
|
||||
|
||||
; X86-64: t1:
|
||||
; X86-64: movaps _.str12(%rip), %xmm0
|
||||
; X86-64: movaps _.str(%rip), %xmm0
|
||||
; X86-64: movaps %xmm0
|
||||
; X86-64: movb $0
|
||||
; X86-64: movq $0
|
||||
%parms.i = alloca [13 x %struct.ParmT] ; <[13 x %struct.ParmT]*> [#uses=1]
|
||||
%parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0 ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind
|
||||
unreachable
|
||||
%tmp1 = alloca [25 x i8]
|
||||
%tmp2 = bitcast [25 x i8]* %tmp1 to i8*
|
||||
call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind
|
||||
unreachable
|
||||
}
|
||||
|
||||
;rdar://7774704
|
||||
@ -119,4 +119,49 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t4() nounwind {
|
||||
entry:
|
||||
; SSE2: t4:
|
||||
; SSE2: movw $120
|
||||
; SSE2: movl $2021161080
|
||||
; SSE2: movl $2021161080
|
||||
; SSE2: movl $2021161080
|
||||
; SSE2: movl $2021161080
|
||||
; SSE2: movl $2021161080
|
||||
; SSE2: movl $2021161080
|
||||
; SSE2: movl $2021161080
|
||||
|
||||
; SSE1: t4:
|
||||
; SSE1: movw $120
|
||||
; SSE1: movl $2021161080
|
||||
; SSE1: movl $2021161080
|
||||
; SSE1: movl $2021161080
|
||||
; SSE1: movl $2021161080
|
||||
; SSE1: movl $2021161080
|
||||
; SSE1: movl $2021161080
|
||||
; SSE1: movl $2021161080
|
||||
|
||||
; NOSSE: t4:
|
||||
; NOSSE: movw $120
|
||||
; NOSSE: movl $2021161080
|
||||
; NOSSE: movl $2021161080
|
||||
; NOSSE: movl $2021161080
|
||||
; NOSSE: movl $2021161080
|
||||
; NOSSE: movl $2021161080
|
||||
; NOSSE: movl $2021161080
|
||||
; NOSSE: movl $2021161080
|
||||
|
||||
; X86-64: t4:
|
||||
; X86-64: movabsq $8680820740569200760, %rax
|
||||
; X86-64: movq %rax
|
||||
; X86-64: movq %rax
|
||||
; X86-64: movq %rax
|
||||
; X86-64: movw $120
|
||||
; X86-64: movl $2021161080
|
||||
%tmp1 = alloca [30 x i8]
|
||||
%tmp2 = bitcast [30 x i8]* %tmp1 to i8*
|
||||
call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1)
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
|
||||
|
@ -13,9 +13,7 @@ entry:
|
||||
bb:
|
||||
%String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
|
||||
call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
|
||||
; I386: movsd _.str3+16
|
||||
; I386: movsd _.str3+8
|
||||
; I386: movsd _.str3
|
||||
; I386: call {{_?}}memcpy
|
||||
|
||||
; CORE2: movabsq
|
||||
; CORE2: movabsq
|
||||
|
Loading…
x
Reference in New Issue
Block a user