mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 04:09:45 +00:00
AMDGPU: Fix sext_inreg for i1 in i16
This produces worse code when i16 is legal, mostly due to combines getting confused by conversions inserted for uniform 16-bit operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291717 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8c7e9845cf
commit
fac51240d9
@ -870,6 +870,11 @@ def : Pat <
|
||||
(S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i16 (sext_inreg i16:$src, i1)),
|
||||
(S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i16 (sext_inreg i16:$src, i8)),
|
||||
(S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
|
||||
|
@ -2,6 +2,8 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: i16 promotion pass ruins the scalar cases when legal.
|
||||
|
||||
; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
|
||||
; GCN: s_load_dword [[ARG:s[0-9]+]],
|
||||
; GCN: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
|
||||
@ -659,6 +661,137 @@ define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrs
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_sext_in_reg_i1_i16:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]]
|
||||
|
||||
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x10000
|
||||
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
|
||||
; SI: buffer_store_short [[VBFE]]
|
||||
|
||||
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
|
||||
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15
|
||||
define void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
%ld = load i32, i32 addrspace(2)* %ptr
|
||||
%in = trunc i32 %ld to i16
|
||||
%shl = shl i16 %in, 15
|
||||
%sext = ashr i16 %shl, 15
|
||||
store i16 %sext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]]
|
||||
|
||||
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000
|
||||
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
|
||||
; SI: buffer_store_short [[VBFE]]
|
||||
|
||||
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
|
||||
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14
|
||||
define void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 {
|
||||
%ld = load i32, i32 addrspace(2)* %ptr
|
||||
%in = trunc i32 %ld to i16
|
||||
%shl = shl i16 %in, 14
|
||||
%sext = ashr i16 %shl, 14
|
||||
store i16 %sext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16:
|
||||
; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]]
|
||||
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}}
|
||||
|
||||
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
|
||||
define void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid
|
||||
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
|
||||
|
||||
%in = load i16, i16 addrspace(1)* %gep
|
||||
%shl = shl i16 %in, 15
|
||||
%sext = ashr i16 %shl, 15
|
||||
store i16 %sext, i16 addrspace(3)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16_nonload:
|
||||
; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_ushort [[VAL1:v[0-9]+]]
|
||||
|
||||
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
|
||||
; VI: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
|
||||
|
||||
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
|
||||
; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]]
|
||||
define void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
|
||||
%b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid
|
||||
%out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid
|
||||
%a = load volatile i16, i16 addrspace(1)* %a.gep, align 2
|
||||
%b = load volatile i16, i16 addrspace(1)* %b.gep, align 2
|
||||
|
||||
%c = shl i16 %a, %b
|
||||
%shl = shl i16 %c, 15
|
||||
%ashr = ashr i16 %shl, 15
|
||||
|
||||
store i16 %ashr, i16 addrspace(3)* %out.gep, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16_arg:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]]
|
||||
|
||||
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000
|
||||
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
|
||||
; SI: buffer_store_short [[VBFE]]
|
||||
|
||||
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
|
||||
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}}
|
||||
define void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
|
||||
%shl = shl i16 %in, 14
|
||||
%sext = ashr i16 %shl, 14
|
||||
store i16 %sext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_sext_in_reg_i8_i16_arg:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]]
|
||||
|
||||
; SI: s_sext_i32_i8 [[SSEXT:s[0-9]+]], [[VAL]]
|
||||
; SI: v_mov_b32_e32 [[VSEXT:v[0-9]+]], [[SSEXT]]
|
||||
; SI: buffer_store_short [[VBFE]]
|
||||
|
||||
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
|
||||
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}}
|
||||
define void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
|
||||
%shl = shl i16 %in, 8
|
||||
%sext = ashr i16 %shl, 8
|
||||
store i16 %sext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_sext_in_reg_i15_i16_arg:
|
||||
; GCN: s_load_dword [[VAL:s[0-9]+]]
|
||||
|
||||
; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0xf0000
|
||||
; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]]
|
||||
; SI: buffer_store_short [[VBFE]]
|
||||
|
||||
; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
|
||||
; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}}
|
||||
define void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 {
|
||||
%shl = shl i16 %in, 1
|
||||
%sext = ashr i16 %shl, 1
|
||||
store i16 %sext, i16 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user