mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 15:39:06 +00:00
Avoid using lossy load / stores for memcpy / memset expansion. e.g.
f64 load / store on non-SSE2 x86 targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169944 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d0a0d221da
commit
61f4dfe369
@ -716,6 +716,15 @@ public:
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
/// isLegalMemOpType - Returns true if it's legal to use load / store of the
|
||||
/// specified type to expand memcpy / memset inline. This is mostly true
|
||||
/// for legal types except for some special cases. For example, on X86
|
||||
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
|
||||
/// also does type conversion.
|
||||
virtual bool isLegalMemOpType(MVT VT) const {
|
||||
return VT.isInteger();
|
||||
}
|
||||
|
||||
/// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp
|
||||
/// to implement llvm.setjmp.
|
||||
bool usesUnderscoreSetJmp() const {
|
||||
|
@ -3474,27 +3474,33 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
while (VTSize > Size) {
|
||||
// For now, only use non-vector load / store's for the left-over pieces.
|
||||
EVT NewVT;
|
||||
EVT NewVT = VT;
|
||||
unsigned NewVTSize;
|
||||
|
||||
bool Found = false;
|
||||
if (VT.isVector() || VT.isFloatingPoint()) {
|
||||
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
|
||||
while (!TLI.isOperationLegalOrCustom(ISD::STORE, NewVT)) {
|
||||
if (NewVT == MVT::i64 &&
|
||||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64)) {
|
||||
// i64 is usually not legal on 32-bit targets, but f64 may be.
|
||||
NewVT = MVT::f64;
|
||||
break;
|
||||
}
|
||||
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
|
||||
if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
|
||||
TLI.isLegalMemOpType(NewVT.getSimpleVT()))
|
||||
Found = true;
|
||||
else if (NewVT == MVT::i64 &&
|
||||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
|
||||
TLI.isLegalMemOpType(MVT::f64)) {
|
||||
// i64 is usually not legal on 32-bit targets, but f64 may be.
|
||||
NewVT = MVT::f64;
|
||||
Found = true;
|
||||
}
|
||||
NewVTSize = NewVT.getSizeInBits() / 8;
|
||||
} else {
|
||||
// This can result in a type that is not legal on the target, e.g.
|
||||
// 1 or 2 bytes on PPC.
|
||||
NewVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||
NewVTSize = VTSize >> 1;
|
||||
}
|
||||
|
||||
if (!Found) {
|
||||
do {
|
||||
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
|
||||
if (NewVT == MVT::i8)
|
||||
break;
|
||||
} while (!TLI.isLegalMemOpType(NewVT.getSimpleVT()));
|
||||
}
|
||||
NewVTSize = NewVT.getSizeInBits() / 8;
|
||||
|
||||
// If the new VT cannot cover all of the remaining bits, then consider
|
||||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||
// FIXME: Only does this for 64-bit or more since we don't have proper
|
||||
|
@ -9481,6 +9481,10 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
bool ARMTargetLowering::isLegalMemOpType(MVT VT) const {
|
||||
return VT.isInteger() || VT == MVT::f64 || VT == MVT::v2f64;
|
||||
}
|
||||
|
||||
bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
|
||||
if (Val.getOpcode() != ISD::LOAD)
|
||||
return false;
|
||||
|
@ -296,6 +296,13 @@ namespace llvm {
|
||||
bool MemcpyStrSrc,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
/// isLegalMemOpType - Returns true if it's legal to use load / store of the
|
||||
/// specified type to expand memcpy / memset inline. This is mostly true
|
||||
/// for legal types except for some special cases. For example, on X86
|
||||
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
|
||||
/// also does type conversion.
|
||||
virtual bool isLegalMemOpType(MVT VT) const;
|
||||
|
||||
using TargetLowering::isZExtFree;
|
||||
virtual bool isZExtFree(SDValue Val, EVT VT2) const;
|
||||
|
||||
|
@ -1412,6 +1412,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
return MVT::i32;
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isLegalMemOpType(MVT VT) const {
|
||||
if (VT == MVT::f32)
|
||||
return X86ScalarSSEf32;
|
||||
else if (VT == MVT::f64)
|
||||
return X86ScalarSSEf64;
|
||||
return VT.isInteger();
|
||||
}
|
||||
|
||||
bool
|
||||
X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
|
||||
if (Fast)
|
||||
|
@ -506,6 +506,13 @@ namespace llvm {
|
||||
bool IsZeroVal, bool MemcpyStrSrc,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
/// isLegalMemOpType - Returns true if it's legal to use load / store of the
|
||||
/// specified type to expand memcpy / memset inline. This is mostly true
|
||||
/// for legal types except for some special cases. For example, on X86
|
||||
/// targets without SSE2 f64 load / store are done with fldl / fstpl which
|
||||
/// also does type conversion.
|
||||
virtual bool isLegalMemOpType(MVT VT) const;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
/// unaligned memory accesses. of the specified type. Returns whether it
|
||||
/// is "fast" by reference in the second argument.
|
||||
|
@ -17,11 +17,11 @@ entry:
|
||||
; SSE2: movb $0, 24(%esp)
|
||||
|
||||
; SSE1: t1:
|
||||
; SSE1: fldl _.str+16
|
||||
; SSE1: fstpl 16(%esp)
|
||||
; SSE1: movaps _.str, %xmm0
|
||||
; SSE1: movaps %xmm0
|
||||
; SSE1: movb $0, 24(%esp)
|
||||
; SSE1: movl $0, 20(%esp)
|
||||
; SSE1: movl $0, 16(%esp)
|
||||
|
||||
; NOSSE: t1:
|
||||
; NOSSE: movb $0
|
||||
|
Loading…
Reference in New Issue
Block a user