mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
AArch64: use 32-bit MOV rather than UBFX to truncate registers.
It's potentially more efficient on Cyclone, and from the optimization guides & schedulers looks like it has no effect on Cortex-A53 or A57. In general you'd expect a MOV to be about the most efficient instruction with its semantics, even though the official "UXTW" alias is really a UBFX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243576 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
81474d36a0
commit
3614662adb
@ -5148,10 +5148,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
|
||||
def : Pat<(i64 (anyext GPR32:$src)),
|
||||
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
|
||||
|
||||
// When we need to explicitly zero-extend, we use an unsigned bitfield move
|
||||
// instruction (UBFM) on the enclosing super-reg.
|
||||
// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
|
||||
// then assert the extension has happened.
|
||||
def : Pat<(i64 (zext GPR32:$src)),
|
||||
(UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
|
||||
(SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
|
||||
|
||||
// To sign extend, we use a signed bitfield move instruction (SBFM) on the
|
||||
// containing super-reg.
|
||||
|
@ -252,11 +252,11 @@ entry:
|
||||
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
|
||||
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
|
||||
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
|
||||
; CHECK: ubfx x9, x0, #0, #32
|
||||
; CHECK: mov w9, w0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: lsl x9, x9, #2
|
||||
; CHECK: add x9, x9, #15
|
||||
; CHECK: and x9, x9, #0x7fffffff0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
|
||||
; CHECK: mov sp, x[[VLASPTMP]]
|
||||
; Check correct access to local variable, through frame pointer
|
||||
@ -299,11 +299,11 @@ entry:
|
||||
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
|
||||
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
|
||||
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
|
||||
; CHECK: ubfx x9, x0, #0, #32
|
||||
; CHECK: mov w9, w0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: lsl x9, x9, #2
|
||||
; CHECK: add x9, x9, #15
|
||||
; CHECK: and x9, x9, #0x7fffffff0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
|
||||
; CHECK: mov sp, x[[VLASPTMP]]
|
||||
; Check correct access to local variable, through frame pointer
|
||||
@ -361,11 +361,11 @@ entry:
|
||||
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
|
||||
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
|
||||
; and set-up of base pointer (x19).
|
||||
; CHECK: ubfx x9, x0, #0, #32
|
||||
; CHECK: mov w9, w0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: lsl x9, x9, #2
|
||||
; CHECK: add x9, x9, #15
|
||||
; CHECK: and x9, x9, #0x7fffffff0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
|
||||
; CHECK: mov sp, x[[VLASPTMP]]
|
||||
; Check correct access to local variable, through base pointer
|
||||
@ -414,11 +414,11 @@ entry:
|
||||
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
|
||||
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
|
||||
; and set-up of base pointer (x19).
|
||||
; CHECK: ubfx x9, x0, #0, #32
|
||||
; CHECK: mov w9, w0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: lsl x9, x9, #2
|
||||
; CHECK: add x9, x9, #15
|
||||
; CHECK: and x9, x9, #0x7fffffff0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
|
||||
; CHECK: mov sp, x[[VLASPTMP]]
|
||||
; Check correct access to local variable, through base pointer
|
||||
@ -465,11 +465,11 @@ entry:
|
||||
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
|
||||
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
|
||||
; and set-up of base pointer (x19).
|
||||
; CHECK: ubfx x9, x0, #0, #32
|
||||
; CHECK: mov w9, w0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: lsl x9, x9, #2
|
||||
; CHECK: add x9, x9, #15
|
||||
; CHECK: and x9, x9, #0x7fffffff0
|
||||
; CHECK: mov x10, sp
|
||||
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
|
||||
; CHECK: mov sp, x[[VLASPTMP]]
|
||||
; Check correct access to local variable, through base pointer
|
||||
|
@ -78,8 +78,8 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
|
||||
|
||||
%ext_int = zext i32 %int to i64
|
||||
store volatile i64 %ext_int, i64* @var64
|
||||
; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32
|
||||
; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
|
||||
; CHECK: mov w[[EXT:[0-9]+]], w3
|
||||
; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
|
||||
|
||||
ret void
|
||||
}
|
||||
|
@ -4,8 +4,8 @@
|
||||
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
|
||||
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
|
||||
ret i32 %cnt
|
||||
; CHECK: ubfx x{{[0-9]+}}
|
||||
; CHECK: fmov d0, x{{[0-9]+}}
|
||||
; CHECK: mov w[[IN64:[0-9]+]], w0
|
||||
; CHECK: fmov d0, x[[IN64]]
|
||||
; CHECK: cnt.8b v0, v0
|
||||
; CHECK: uaddlv.8b h0, v0
|
||||
; CHECK: fmov w0, s0
|
||||
|
@ -60,7 +60,7 @@ define void @test_extendw(i32 %var) {
|
||||
|
||||
%uxt64 = zext i32 %var to i64
|
||||
store volatile i64 %uxt64, i64* @var64
|
||||
; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
|
||||
; CHECK: mov {{w[0-9]+}}, w0
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user