mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 13:10:34 +00:00
11729224bf
This is a preliminary patch for fast instruction selection on PowerPC. Code generation can differ between DAG isel and fast isel. Existing tests that specify -O0 were written to expect DAG isel. Make this explicit by adding -fast-isel=false to the tests. In some cases specifying -fast-isel=false produces different code even when there isn't a fast instruction selector specified. This is because TM.Options.EnableFastISel = 1 at -O0 whether or not a FastISel object exists. Thus disabling fast isel can actually produce less conservative code. Because of this, some of the expected code generation in the -O0 tests needs to be adjusted. In particular, handling of function arguments is less conservative with -fast-isel=false (see isOnlyUsedInEntryBlock() in SelectionDAGBuilder.cpp). This results in fewer stack accesses and, in some cases, reduced stack size as uselessly loaded values are no longer stored back to spill locations in the stack. No functional change with this patch; test case adjustments only. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183939 91177308-0d34-0410-b5e6-96231b3b80d8
216 lines
9.8 KiB
LLVM
216 lines
9.8 KiB
LLVM
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s
|
|
|
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
|
target triple = "powerpc64-unknown-linux-gnu"
|
|
|
|
%struct.s1 = type { i8 }
|
|
%struct.s2 = type { i16 }
|
|
%struct.s4 = type { i32 }
|
|
%struct.t1 = type { i8 }
|
|
%struct.t3 = type <{ i16, i8 }>
|
|
%struct.t5 = type <{ i32, i8 }>
|
|
%struct.t6 = type <{ i32, i16 }>
|
|
%struct.t7 = type <{ i32, i16, i8 }>
|
|
%struct.s3 = type { i16, i8 }
|
|
%struct.s5 = type { i32, i8 }
|
|
%struct.s6 = type { i32, i16 }
|
|
%struct.s7 = type { i32, i16, i8 }
|
|
%struct.t2 = type <{ i16 }>
|
|
%struct.t4 = type <{ i32 }>
|
|
|
|
@caller1.p1 = private unnamed_addr constant %struct.s1 { i8 1 }, align 1
|
|
@caller1.p2 = private unnamed_addr constant %struct.s2 { i16 2 }, align 2
|
|
@caller1.p3 = private unnamed_addr constant { i16, i8, i8 } { i16 4, i8 8, i8 undef }, align 2
|
|
@caller1.p4 = private unnamed_addr constant %struct.s4 { i32 16 }, align 4
|
|
@caller1.p5 = private unnamed_addr constant { i32, i8, [3 x i8] } { i32 32, i8 64, [3 x i8] undef }, align 4
|
|
@caller1.p6 = private unnamed_addr constant { i32, i16, [2 x i8] } { i32 128, i16 256, [2 x i8] undef }, align 4
|
|
@caller1.p7 = private unnamed_addr constant { i32, i16, i8, i8 } { i32 512, i16 1024, i8 -3, i8 undef }, align 4
|
|
@caller2.p1 = private unnamed_addr constant %struct.t1 { i8 1 }, align 1
|
|
@caller2.p2 = private unnamed_addr constant { i16 } { i16 2 }, align 1
|
|
@caller2.p3 = private unnamed_addr constant %struct.t3 <{ i16 4, i8 8 }>, align 1
|
|
@caller2.p4 = private unnamed_addr constant { i32 } { i32 16 }, align 1
|
|
@caller2.p5 = private unnamed_addr constant %struct.t5 <{ i32 32, i8 64 }>, align 1
|
|
@caller2.p6 = private unnamed_addr constant %struct.t6 <{ i32 128, i16 256 }>, align 1
|
|
@caller2.p7 = private unnamed_addr constant %struct.t7 <{ i32 512, i16 1024, i8 -3 }>, align 1
|
|
|
|
define i32 @caller1() nounwind {
|
|
entry:
|
|
%p1 = alloca %struct.s1, align 1
|
|
%p2 = alloca %struct.s2, align 2
|
|
%p3 = alloca %struct.s3, align 2
|
|
%p4 = alloca %struct.s4, align 4
|
|
%p5 = alloca %struct.s5, align 4
|
|
%p6 = alloca %struct.s6, align 4
|
|
%p7 = alloca %struct.s7, align 4
|
|
%0 = bitcast %struct.s1* %p1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
|
|
%1 = bitcast %struct.s2* %p2 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
|
|
%2 = bitcast %struct.s3* %p3 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast ({ i16, i8, i8 }* @caller1.p3 to i8*), i64 4, i32 2, i1 false)
|
|
%3 = bitcast %struct.s4* %p4 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast (%struct.s4* @caller1.p4 to i8*), i64 4, i32 4, i1 false)
|
|
%4 = bitcast %struct.s5* %p5 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast ({ i32, i8, [3 x i8] }* @caller1.p5 to i8*), i64 8, i32 4, i1 false)
|
|
%5 = bitcast %struct.s6* %p6 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast ({ i32, i16, [2 x i8] }* @caller1.p6 to i8*), i64 8, i32 4, i1 false)
|
|
%6 = bitcast %struct.s7* %p7 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast ({ i32, i16, i8, i8 }* @caller1.p7 to i8*), i64 8, i32 4, i1 false)
|
|
%call = call i32 @callee1(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
|
|
ret i32 %call
|
|
|
|
; CHECK: stb {{[0-9]+}}, 119(1)
|
|
; CHECK: sth {{[0-9]+}}, 126(1)
|
|
; CHECK: stw {{[0-9]+}}, 132(1)
|
|
; CHECK: stw {{[0-9]+}}, 140(1)
|
|
; CHECK: std {{[0-9]+}}, 144(1)
|
|
; CHECK: std {{[0-9]+}}, 152(1)
|
|
; CHECK: std {{[0-9]+}}, 160(1)
|
|
}
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
|
|
|
define internal i32 @callee1(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
|
|
entry:
|
|
%z1.addr = alloca i32, align 4
|
|
%z2.addr = alloca i32, align 4
|
|
%z3.addr = alloca i32, align 4
|
|
%z4.addr = alloca i32, align 4
|
|
%z5.addr = alloca i32, align 4
|
|
%z6.addr = alloca i32, align 4
|
|
%z7.addr = alloca i32, align 4
|
|
%z8.addr = alloca i32, align 4
|
|
store i32 %z1, i32* %z1.addr, align 4
|
|
store i32 %z2, i32* %z2.addr, align 4
|
|
store i32 %z3, i32* %z3.addr, align 4
|
|
store i32 %z4, i32* %z4.addr, align 4
|
|
store i32 %z5, i32* %z5.addr, align 4
|
|
store i32 %z6, i32* %z6.addr, align 4
|
|
store i32 %z7, i32* %z7.addr, align 4
|
|
store i32 %z8, i32* %z8.addr, align 4
|
|
%a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
|
|
%0 = load i8* %a, align 1
|
|
%conv = zext i8 %0 to i32
|
|
%a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
|
|
%1 = load i16* %a1, align 2
|
|
%conv2 = sext i16 %1 to i32
|
|
%add = add nsw i32 %conv, %conv2
|
|
%a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
|
|
%2 = load i16* %a3, align 2
|
|
%conv4 = sext i16 %2 to i32
|
|
%add5 = add nsw i32 %add, %conv4
|
|
%a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
|
|
%3 = load i32* %a6, align 4
|
|
%add7 = add nsw i32 %add5, %3
|
|
%a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
|
|
%4 = load i32* %a8, align 4
|
|
%add9 = add nsw i32 %add7, %4
|
|
%a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
|
|
%5 = load i32* %a10, align 4
|
|
%add11 = add nsw i32 %add9, %5
|
|
%a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
|
|
%6 = load i32* %a12, align 4
|
|
%add13 = add nsw i32 %add11, %6
|
|
ret i32 %add13
|
|
|
|
; CHECK: lha {{[0-9]+}}, 126(1)
|
|
; CHECK: lha {{[0-9]+}}, 132(1)
|
|
; CHECK: lbz {{[0-9]+}}, 119(1)
|
|
; CHECK: lwz {{[0-9]+}}, 140(1)
|
|
; CHECK: lwz {{[0-9]+}}, 144(1)
|
|
; CHECK: lwz {{[0-9]+}}, 152(1)
|
|
; CHECK: lwz {{[0-9]+}}, 160(1)
|
|
}
|
|
|
|
define i32 @caller2() nounwind {
|
|
entry:
|
|
%p1 = alloca %struct.t1, align 1
|
|
%p2 = alloca %struct.t2, align 1
|
|
%p3 = alloca %struct.t3, align 1
|
|
%p4 = alloca %struct.t4, align 1
|
|
%p5 = alloca %struct.t5, align 1
|
|
%p6 = alloca %struct.t6, align 1
|
|
%p7 = alloca %struct.t7, align 1
|
|
%0 = bitcast %struct.t1* %p1 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
|
|
%1 = bitcast %struct.t2* %p2 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
|
|
%2 = bitcast %struct.t3* %p3 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.t3* @caller2.p3 to i8*), i64 3, i32 1, i1 false)
|
|
%3 = bitcast %struct.t4* %p4 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* bitcast ({ i32 }* @caller2.p4 to i8*), i64 4, i32 1, i1 false)
|
|
%4 = bitcast %struct.t5* %p5 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.t5* @caller2.p5 to i8*), i64 5, i32 1, i1 false)
|
|
%5 = bitcast %struct.t6* %p6 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* bitcast (%struct.t6* @caller2.p6 to i8*), i64 6, i32 1, i1 false)
|
|
%6 = bitcast %struct.t7* %p7 to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* bitcast (%struct.t7* @caller2.p7 to i8*), i64 7, i32 1, i1 false)
|
|
%call = call i32 @callee2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, %struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7)
|
|
ret i32 %call
|
|
|
|
; CHECK: stb {{[0-9]+}}, 119(1)
|
|
; CHECK: sth {{[0-9]+}}, 126(1)
|
|
; CHECK: stb {{[0-9]+}}, 135(1)
|
|
; CHECK: sth {{[0-9]+}}, 133(1)
|
|
; CHECK: stw {{[0-9]+}}, 140(1)
|
|
; CHECK: stb {{[0-9]+}}, 151(1)
|
|
; CHECK: stw {{[0-9]+}}, 147(1)
|
|
; CHECK: sth {{[0-9]+}}, 158(1)
|
|
; CHECK: stw {{[0-9]+}}, 154(1)
|
|
; CHECK: stb {{[0-9]+}}, 167(1)
|
|
; CHECK: sth {{[0-9]+}}, 165(1)
|
|
; CHECK: stw {{[0-9]+}}, 161(1)
|
|
}
|
|
|
|
define internal i32 @callee2(i32 %z1, i32 %z2, i32 %z3, i32 %z4, i32 %z5, i32 %z6, i32 %z7, i32 %z8, %struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
|
|
entry:
|
|
%z1.addr = alloca i32, align 4
|
|
%z2.addr = alloca i32, align 4
|
|
%z3.addr = alloca i32, align 4
|
|
%z4.addr = alloca i32, align 4
|
|
%z5.addr = alloca i32, align 4
|
|
%z6.addr = alloca i32, align 4
|
|
%z7.addr = alloca i32, align 4
|
|
%z8.addr = alloca i32, align 4
|
|
store i32 %z1, i32* %z1.addr, align 4
|
|
store i32 %z2, i32* %z2.addr, align 4
|
|
store i32 %z3, i32* %z3.addr, align 4
|
|
store i32 %z4, i32* %z4.addr, align 4
|
|
store i32 %z5, i32* %z5.addr, align 4
|
|
store i32 %z6, i32* %z6.addr, align 4
|
|
store i32 %z7, i32* %z7.addr, align 4
|
|
store i32 %z8, i32* %z8.addr, align 4
|
|
%a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
|
|
%0 = load i8* %a, align 1
|
|
%conv = zext i8 %0 to i32
|
|
%a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
|
|
%1 = load i16* %a1, align 1
|
|
%conv2 = sext i16 %1 to i32
|
|
%add = add nsw i32 %conv, %conv2
|
|
%a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
|
|
%2 = load i16* %a3, align 1
|
|
%conv4 = sext i16 %2 to i32
|
|
%add5 = add nsw i32 %add, %conv4
|
|
%a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
|
|
%3 = load i32* %a6, align 1
|
|
%add7 = add nsw i32 %add5, %3
|
|
%a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
|
|
%4 = load i32* %a8, align 1
|
|
%add9 = add nsw i32 %add7, %4
|
|
%a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
|
|
%5 = load i32* %a10, align 1
|
|
%add11 = add nsw i32 %add9, %5
|
|
%a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
|
|
%6 = load i32* %a12, align 1
|
|
%add13 = add nsw i32 %add11, %6
|
|
ret i32 %add13
|
|
|
|
; CHECK: lha {{[0-9]+}}, 126(1)
|
|
; CHECK: lha {{[0-9]+}}, 133(1)
|
|
; CHECK: lbz {{[0-9]+}}, 119(1)
|
|
; CHECK: lwz {{[0-9]+}}, 140(1)
|
|
; CHECK: lwz {{[0-9]+}}, 147(1)
|
|
; CHECK: lwz {{[0-9]+}}, 154(1)
|
|
; CHECK: lwz {{[0-9]+}}, 161(1)
|
|
}
|