mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-22 11:39:35 +00:00
call libc memcpy/memset if array size is bigger then threshold.
Coping 100MB array (after a warmup) shows that glibc 2.6.1 implementation on x86-64 (core 2) is 30% faster (from 0.270917s to 0.188079s) llvm-svn: 41479
This commit is contained in:
parent
3dffac0c59
commit
3d52fe3ef3
@ -3753,10 +3753,10 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
|
||||
if (Align == 0) Align = 1;
|
||||
|
||||
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
|
||||
// If not DWORD aligned, call memset if size is less than the threshold.
|
||||
// If not DWORD aligned or size is more than the threshold, call memset.
|
||||
// It knows how to align to the right boundary first.
|
||||
if ((Align & 3) != 0 ||
|
||||
(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
|
||||
(I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
|
||||
MVT::ValueType IntPtr = getPointerTy();
|
||||
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
||||
TargetLowering::ArgListTy Args;
|
||||
@ -3909,10 +3909,10 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
|
||||
if (Align == 0) Align = 1;
|
||||
|
||||
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
|
||||
// If not DWORD aligned, call memcpy if size is less than the threshold.
|
||||
// If not DWORD aligned or size is more than the threshold, call memcpy.
|
||||
// It knows how to align to the right boundary first.
|
||||
if ((Align & 3) != 0 ||
|
||||
(I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
|
||||
(I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
|
||||
MVT::ValueType IntPtr = getPointerTy();
|
||||
TargetLowering::ArgListTy Args;
|
||||
TargetLowering::ArgListEntry Entry;
|
||||
|
@ -1,24 +1,26 @@
|
||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 | grep movs
|
||||
declare void %llvm.memcpy.i32(sbyte* %A, sbyte* %B, uint %amt, uint %align)
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep movs | count 1
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep memcpy | count 2
|
||||
|
||||
%A = global [1000 x int] zeroinitializer
|
||||
%B = global [1000 x int] zeroinitializer
|
||||
@A = global [32 x i32] zeroinitializer
|
||||
@B = global [32 x i32] zeroinitializer
|
||||
|
||||
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
|
||||
|
||||
void %main() {
|
||||
define void @main() {
|
||||
; dword copy
|
||||
call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
|
||||
sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
|
||||
uint 4000, uint 4)
|
||||
call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
|
||||
i8* bitcast ([32 x i32]* @B to i8*),
|
||||
i32 128, i32 4 )
|
||||
|
||||
; word copy
|
||||
call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
|
||||
sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
|
||||
uint 4000, uint 2)
|
||||
call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
|
||||
i8* bitcast ([32 x i32]* @B to i8*),
|
||||
i32 128, i32 2 )
|
||||
|
||||
; byte copy
|
||||
call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*),
|
||||
sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*),
|
||||
uint 4000, uint 1)
|
||||
call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
|
||||
i8* bitcast ([32 x i32]* @B to i8*),
|
||||
i32 128, i32 1 )
|
||||
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user