mirror of
https://github.com/RPCSX/llvm.git
synced 2026-01-31 01:05:23 +01:00
This patch does an inline expansion of memcmp. It changes the memcmp library call into an inline expansion when the size is known at compile time and is under a target specified threshold. This expansion is implemented in CodeGenPrepare and expands into straight line code. The target specifies a maximum load size and the expansion works by using this size to load the two sources, compare, and exit early if a difference is found. It also has a special case when the memcmp result is used in a compare to zero equality. Differential Revision: https://reviews.llvm.org/D28637 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304313 91177308-0d34-0410-b5e6-96231b3b80d8
195 lines
8.9 KiB
LLVM
195 lines
8.9 KiB
LLVM
; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s
|
|
; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s | FileCheck %s --check-prefix=CHECK-BE
|
|
|
|
define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
|
|
entry:
|
|
; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
|
|
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
|
|
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]]
|
|
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK-LABEL: res_block:{{.*}}
|
|
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
|
|
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
|
; CHECK-NEXT: br label %endblock
|
|
|
|
; CHECK: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
|
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
|
; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]]
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]]
|
|
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
|
|
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]]
|
|
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
|
|
|
|
|
|
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]]
|
|
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK-BE-LABEL: res_block:{{.*}}
|
|
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
|
|
; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
|
; CHECK-BE-NEXT: br label %endblock
|
|
|
|
; CHECK-BE: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
|
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1
|
|
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]]
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]]
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]]
|
|
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
|
|
|
|
%0 = bitcast i32* %buffer1 to i8*
|
|
%1 = bitcast i32* %buffer2 to i8*
|
|
%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 16)
|
|
ret i32 %call
|
|
}
|
|
|
|
declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1
|
|
|
|
define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
|
|
; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
|
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
|
|
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
|
|
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
|
|
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
|
|
|
|
; CHECK-LABEL: res_block:{{.*}}
|
|
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
|
|
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
|
; CHECK-NEXT: br label %endblock
|
|
|
|
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
|
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
|
|
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock
|
|
|
|
; CHECK-BE-LABEL: res_block:{{.*}}
|
|
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
|
|
; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
|
; CHECK-BE-NEXT: br label %endblock
|
|
|
|
entry:
|
|
%0 = bitcast i32* %buffer1 to i8*
|
|
%1 = bitcast i32* %buffer2 to i8*
|
|
%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 4)
|
|
ret i32 %call
|
|
}
|
|
|
|
define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
|
|
; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
|
|
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
|
|
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]]
|
|
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK-LABEL: res_block:{{.*}}
|
|
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
|
|
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
|
; CHECK-NEXT: br label %endblock
|
|
|
|
; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
|
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
|
|
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
|
|
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
|
|
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16*
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
|
|
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]])
|
|
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]])
|
|
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64
|
|
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8*
|
|
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
|
|
; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
|
|
; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
|
|
; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-NEXT: br label %endblock
|
|
|
|
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]]
|
|
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK-BE-LABEL: res_block:{{.*}}
|
|
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
|
|
; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
|
|
; CHECK-BE-NEXT: br label %endblock
|
|
|
|
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
|
|
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
|
|
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16*
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
|
|
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64
|
|
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0
|
|
; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label
|
|
|
|
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8*
|
|
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
|
|
; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
|
|
; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
|
|
; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
|
|
; CHECK-BE-NEXT: br label %endblock
|
|
|
|
entry:
|
|
%0 = bitcast i32* %buffer1 to i8*
|
|
%1 = bitcast i32* %buffer2 to i8*
|
|
%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
|
|
ret i32 %call
|
|
}
|
|
; CHECK: call = tail call signext i32 @memcmp
|
|
; CHECK-BE: call = tail call signext i32 @memcmp
|
|
define signext i32 @test4(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
|
|
|
|
entry:
|
|
%0 = bitcast i32* %buffer1 to i8*
|
|
%1 = bitcast i32* %buffer2 to i8*
|
|
%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 65)
|
|
ret i32 %call
|
|
}
|
|
|
|
define signext i32 @test5(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2, i32 signext %SIZE) {
|
|
; CHECK: call = tail call signext i32 @memcmp
|
|
; CHECK-BE: call = tail call signext i32 @memcmp
|
|
entry:
|
|
%0 = bitcast i32* %buffer1 to i8*
|
|
%1 = bitcast i32* %buffer2 to i8*
|
|
%conv = sext i32 %SIZE to i64
|
|
%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 %conv)
|
|
ret i32 %call
|
|
}
|